diff options
86 files changed, 927 insertions, 268 deletions
@@ -211,7 +211,8 @@ packetdrill-tests: load-packetdrill .PHONY: packetdrill-tests packetimpact-tests: load-packetimpact - @sudo modprobe iptable_filter ip6table_filter + @sudo modprobe iptable_filter + @sudo modprobe ip6table_filter @$(call submake,install-test-runtime RUNTIME="packetimpact") @$(call submake,test-runtime OPTIONS="--jobs=HOST_CPUS*3 --local_test_jobs=HOST_CPUS*3" RUNTIME="packetimpact" TARGETS="$(shell $(MAKE) query TARGETS='attr(tags, packetimpact, tests(//...))')") .PHONY: packetimpact-tests diff --git a/images/Makefile b/images/Makefile index d183155a8..d4b6524ba 100644 --- a/images/Makefile +++ b/images/Makefile @@ -61,7 +61,7 @@ local_image = $(LOCAL_IMAGE_PREFIX)/$(subst _,/,$(1)) # conflicting with the bazel "build" terminology, which is used elsewhere. rebuild-%: FROM=$(shell grep FROM $(call path,$*)/Dockerfile | cut -d' ' -f2) rebuild-%: register-cross - $(foreach IMAGE,$(FROM),docker pull $(DOCKER_PLATFORM_ARGS) $(IMAGE) &&) true && \ + $(foreach IMAGE,$(FROM),docker pull $(DOCKER_PLATFORM_ARGS) $(IMAGE) &&) \ T=$$(mktemp -d) && cp -a $(call path,$*)/* $$T && \ docker build $(DOCKER_PLATFORM_ARGS) -t $(call remote_image,$*) $$T && \ rm -rf $$T @@ -73,10 +73,10 @@ pull-%: docker pull $(DOCKER_PLATFORM_ARGS) $(call remote_image,$*) # load will either pull the "remote" or build it locally. This is the preferred -# entrypoint, as it should never file. The local tag should always be set after +# entrypoint, as it should never fail. The local tag should always be set after # this returns (either by the pull or the build). load-%: - docker inspect $(call remote_image,$*) >/dev/null 2>&1 || $(MAKE) pull-$* || $(MAKE) rebuild-$* + $(MAKE) pull-$* || $(MAKE) rebuild-$* docker tag $(call remote_image,$*) $(call local_image,$*) # push pushes the remote image, after either pulling (to validate that the tag diff --git a/images/packetdrill/Dockerfile b/images/packetdrill/Dockerfile index 01296dbaf..b4cd73006 100644 --- a/images/packetdrill/Dockerfile +++ b/images/packetdrill/Dockerfile @@ -1,8 +1,8 @@ FROM ubuntu:bionic RUN apt-get update && apt-get install -y net-tools git iptables iputils-ping \ netcat tcpdump jq tar bison flex make +# Pick up updated git. RUN hash -r RUN git clone --depth 1 --branch packetdrill-v2.0 \ https://github.com/google/packetdrill.git RUN cd packetdrill/gtests/net/packetdrill && ./configure && make -CMD /bin/bash diff --git a/images/packetimpact/Dockerfile b/images/packetimpact/Dockerfile index 82b7e8abd..906d5cdd6 100644 --- a/images/packetimpact/Dockerfile +++ b/images/packetimpact/Dockerfile @@ -16,5 +16,3 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ qemu-system-x86 \ # sha1sum to generate entropy. libdigest-sha-perl -RUN hash -r -CMD /bin/bash diff --git a/pkg/abi/linux/BUILD b/pkg/abi/linux/BUILD index b5c5cc20b..cdcaa8c73 100644 --- a/pkg/abi/linux/BUILD +++ b/pkg/abi/linux/BUILD @@ -74,9 +74,9 @@ go_library( "//pkg/abi", "//pkg/binary", "//pkg/bits", + "//pkg/marshal", + "//pkg/marshal/primitive", "//pkg/usermem", - "//tools/go_marshal/marshal", - "//tools/go_marshal/primitive", ], ) diff --git a/pkg/abi/linux/netfilter.go b/pkg/abi/linux/netfilter.go index 91e35366f..6ef91b402 100644 --- a/pkg/abi/linux/netfilter.go +++ b/pkg/abi/linux/netfilter.go @@ -17,9 +17,9 @@ package linux import ( "io" + "gvisor.dev/gvisor/pkg/marshal" + "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/usermem" - "gvisor.dev/gvisor/tools/go_marshal/marshal" - "gvisor.dev/gvisor/tools/go_marshal/primitive" ) // This file contains structures required to support netfilter, specifically diff --git a/pkg/abi/linux/netfilter_ipv6.go b/pkg/abi/linux/netfilter_ipv6.go index f6117024c..347c8c591 100644 --- a/pkg/abi/linux/netfilter_ipv6.go +++ b/pkg/abi/linux/netfilter_ipv6.go @@ -17,9 +17,9 @@ package linux import ( "io" + "gvisor.dev/gvisor/pkg/marshal" + "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/usermem" - "gvisor.dev/gvisor/tools/go_marshal/marshal" - "gvisor.dev/gvisor/tools/go_marshal/primitive" ) // This file contains structures required to support IPv6 netfilter and diff --git a/tools/go_marshal/marshal/BUILD b/pkg/marshal/BUILD index 4aec98218..4aec98218 100644 --- a/tools/go_marshal/marshal/BUILD +++ b/pkg/marshal/BUILD diff --git a/tools/go_marshal/marshal/marshal.go b/pkg/marshal/marshal.go index 85b196f08..85b196f08 100644 --- a/tools/go_marshal/marshal/marshal.go +++ b/pkg/marshal/marshal.go diff --git a/tools/go_marshal/marshal/marshal_impl_util.go b/pkg/marshal/marshal_impl_util.go index 89c7d3575..89c7d3575 100644 --- a/tools/go_marshal/marshal/marshal_impl_util.go +++ b/pkg/marshal/marshal_impl_util.go diff --git a/tools/go_marshal/primitive/BUILD b/pkg/marshal/primitive/BUILD index cc08ba63a..06741e6d1 100644 --- a/tools/go_marshal/primitive/BUILD +++ b/pkg/marshal/primitive/BUILD @@ -12,7 +12,7 @@ go_library( "//:sandbox", ], deps = [ + "//pkg/marshal", "//pkg/usermem", - "//tools/go_marshal/marshal", ], ) diff --git a/tools/go_marshal/primitive/primitive.go b/pkg/marshal/primitive/primitive.go index d93edda8b..140a9b78c 100644 --- a/tools/go_marshal/primitive/primitive.go +++ b/pkg/marshal/primitive/primitive.go @@ -19,8 +19,8 @@ package primitive import ( "io" + "gvisor.dev/gvisor/pkg/marshal" "gvisor.dev/gvisor/pkg/usermem" - "gvisor.dev/gvisor/tools/go_marshal/marshal" ) // Int8 is a marshal.Marshallable implementation for int8. diff --git a/pkg/safemem/BUILD b/pkg/safemem/BUILD index ce30382ab..68ed074f8 100644 --- a/pkg/safemem/BUILD +++ b/pkg/safemem/BUILD @@ -11,9 +11,7 @@ go_library( "seq_unsafe.go", ], visibility = ["//:sandbox"], - deps = [ - "//pkg/safecopy", - ], + deps = ["//pkg/safecopy"], ) go_test( diff --git a/pkg/sentry/arch/BUILD b/pkg/sentry/arch/BUILD index 901e0f320..dc8f0135d 100644 --- a/pkg/sentry/arch/BUILD +++ b/pkg/sentry/arch/BUILD @@ -33,11 +33,11 @@ go_library( "//pkg/context", "//pkg/cpuid", "//pkg/log", + "//pkg/marshal", "//pkg/sentry/limits", "//pkg/sync", "//pkg/syserror", "//pkg/usermem", - "//tools/go_marshal/marshal", ], ) diff --git a/pkg/sentry/arch/signal_act.go b/pkg/sentry/arch/signal_act.go index 32173aa20..d3e2324a8 100644 --- a/pkg/sentry/arch/signal_act.go +++ b/pkg/sentry/arch/signal_act.go @@ -14,7 +14,7 @@ package arch -import "gvisor.dev/gvisor/tools/go_marshal/marshal" +import "gvisor.dev/gvisor/pkg/marshal" // Special values for SignalAct.Handler. const ( diff --git a/pkg/sentry/arch/signal_stack.go b/pkg/sentry/arch/signal_stack.go index 0fa738a1d..a1eae98f9 100644 --- a/pkg/sentry/arch/signal_stack.go +++ b/pkg/sentry/arch/signal_stack.go @@ -17,8 +17,8 @@ package arch import ( + "gvisor.dev/gvisor/pkg/marshal" "gvisor.dev/gvisor/pkg/usermem" - "gvisor.dev/gvisor/tools/go_marshal/marshal" ) const ( diff --git a/pkg/sentry/fs/host/BUILD b/pkg/sentry/fs/host/BUILD index 42a6c41c2..1368014c4 100644 --- a/pkg/sentry/fs/host/BUILD +++ b/pkg/sentry/fs/host/BUILD @@ -32,6 +32,7 @@ go_library( "//pkg/fdnotifier", "//pkg/iovec", "//pkg/log", + "//pkg/marshal/primitive", "//pkg/refs", "//pkg/safemem", "//pkg/secio", @@ -55,7 +56,6 @@ go_library( "//pkg/unet", "//pkg/usermem", "//pkg/waiter", - "//tools/go_marshal/primitive", ], ) diff --git a/pkg/sentry/fs/host/socket_unsafe.go b/pkg/sentry/fs/host/socket_unsafe.go index 5d4f312cf..c8231e0aa 100644 --- a/pkg/sentry/fs/host/socket_unsafe.go +++ b/pkg/sentry/fs/host/socket_unsafe.go @@ -65,10 +65,10 @@ func fdReadVec(fd int, bufs [][]byte, control []byte, peek bool, maxlen int64) ( controlTrunc = msg.Flags&syscall.MSG_CTRUNC == syscall.MSG_CTRUNC if n > length { - return length, n, msg.Controllen, controlTrunc, err + return length, n, msg.Controllen, controlTrunc, nil } - return n, n, msg.Controllen, controlTrunc, err + return n, n, msg.Controllen, controlTrunc, nil } // fdWriteVec sends from bufs to fd. diff --git a/pkg/sentry/fs/host/tty.go b/pkg/sentry/fs/host/tty.go index 87d56a51d..1183727ab 100644 --- a/pkg/sentry/fs/host/tty.go +++ b/pkg/sentry/fs/host/tty.go @@ -17,6 +17,7 @@ package host import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" @@ -24,7 +25,6 @@ import ( "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" - "gvisor.dev/gvisor/tools/go_marshal/primitive" ) // LINT.IfChange diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go index 9cf7f2a62..103bfc600 100644 --- a/pkg/sentry/fs/proc/task.go +++ b/pkg/sentry/fs/proc/task.go @@ -604,7 +604,7 @@ func (s *statusData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ( var vss, rss, data uint64 s.t.WithMuLocked(func(t *kernel.Task) { if fdTable := t.FDTable(); fdTable != nil { - fds = fdTable.Size() + fds = fdTable.CurrentMaxFDs() } if mm := t.MemoryManager(); mm != nil { vss = mm.VirtualMemorySize() diff --git a/pkg/sentry/fs/tty/BUILD b/pkg/sentry/fs/tty/BUILD index fdd5a40d5..e6d0eb359 100644 --- a/pkg/sentry/fs/tty/BUILD +++ b/pkg/sentry/fs/tty/BUILD @@ -17,6 +17,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/marshal/primitive", "//pkg/refs", "//pkg/safemem", "//pkg/sentry/arch", @@ -31,7 +32,6 @@ go_library( "//pkg/syserror", "//pkg/usermem", "//pkg/waiter", - "//tools/go_marshal/primitive", ], ) diff --git a/pkg/sentry/fs/tty/master.go b/pkg/sentry/fs/tty/master.go index bebf90ffa..b91184b1b 100644 --- a/pkg/sentry/fs/tty/master.go +++ b/pkg/sentry/fs/tty/master.go @@ -17,6 +17,7 @@ package tty import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" @@ -25,7 +26,6 @@ import ( "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" - "gvisor.dev/gvisor/tools/go_marshal/primitive" ) // LINT.IfChange diff --git a/pkg/sentry/fs/tty/queue.go b/pkg/sentry/fs/tty/queue.go index e070a1b71..79975d812 100644 --- a/pkg/sentry/fs/tty/queue.go +++ b/pkg/sentry/fs/tty/queue.go @@ -17,6 +17,7 @@ package tty import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" @@ -24,7 +25,6 @@ import ( "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" - "gvisor.dev/gvisor/tools/go_marshal/primitive" ) // LINT.IfChange diff --git a/pkg/sentry/fs/tty/replica.go b/pkg/sentry/fs/tty/replica.go index cb6cd6864..385d230fb 100644 --- a/pkg/sentry/fs/tty/replica.go +++ b/pkg/sentry/fs/tty/replica.go @@ -17,6 +17,7 @@ package tty import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" @@ -24,7 +25,6 @@ import ( "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" - "gvisor.dev/gvisor/tools/go_marshal/primitive" ) // LINT.IfChange diff --git a/pkg/sentry/fs/tty/terminal.go b/pkg/sentry/fs/tty/terminal.go index c9dbf1f3b..4f431d74d 100644 --- a/pkg/sentry/fs/tty/terminal.go +++ b/pkg/sentry/fs/tty/terminal.go @@ -17,10 +17,10 @@ package tty import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/tools/go_marshal/primitive" ) // LINT.IfChange diff --git a/pkg/sentry/fsimpl/devpts/BUILD b/pkg/sentry/fsimpl/devpts/BUILD index ac48ab34b..48e13613a 100644 --- a/pkg/sentry/fsimpl/devpts/BUILD +++ b/pkg/sentry/fsimpl/devpts/BUILD @@ -30,6 +30,8 @@ go_library( "//pkg/abi/linux", "//pkg/context", "//pkg/log", + "//pkg/marshal", + "//pkg/marshal/primitive", "//pkg/refs", "//pkg/safemem", "//pkg/sentry/arch", @@ -43,8 +45,6 @@ go_library( "//pkg/syserror", "//pkg/usermem", "//pkg/waiter", - "//tools/go_marshal/marshal", - "//tools/go_marshal/primitive", ], ) diff --git a/pkg/sentry/fsimpl/devpts/master.go b/pkg/sentry/fsimpl/devpts/master.go index d07e1ded8..83d790b38 100644 --- a/pkg/sentry/fsimpl/devpts/master.go +++ b/pkg/sentry/fsimpl/devpts/master.go @@ -17,6 +17,7 @@ package devpts import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" @@ -27,7 +28,6 @@ import ( "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" - "gvisor.dev/gvisor/tools/go_marshal/primitive" ) // masterInode is the inode for the master end of the Terminal. diff --git a/pkg/sentry/fsimpl/devpts/queue.go b/pkg/sentry/fsimpl/devpts/queue.go index ca36b66e9..55bff3e60 100644 --- a/pkg/sentry/fsimpl/devpts/queue.go +++ b/pkg/sentry/fsimpl/devpts/queue.go @@ -17,6 +17,7 @@ package devpts import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" @@ -24,7 +25,6 @@ import ( "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" - "gvisor.dev/gvisor/tools/go_marshal/primitive" ) // waitBufMaxBytes is the maximum size of a wait buffer. It is based on diff --git a/pkg/sentry/fsimpl/devpts/replica.go b/pkg/sentry/fsimpl/devpts/replica.go index 1f99f4b4d..58f6c1d3a 100644 --- a/pkg/sentry/fsimpl/devpts/replica.go +++ b/pkg/sentry/fsimpl/devpts/replica.go @@ -17,6 +17,7 @@ package devpts import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" @@ -26,7 +27,6 @@ import ( "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" - "gvisor.dev/gvisor/tools/go_marshal/primitive" ) // replicaInode is the inode for the replica end of the Terminal. diff --git a/pkg/sentry/fsimpl/devpts/terminal.go b/pkg/sentry/fsimpl/devpts/terminal.go index 731955d62..510bd6d89 100644 --- a/pkg/sentry/fsimpl/devpts/terminal.go +++ b/pkg/sentry/fsimpl/devpts/terminal.go @@ -17,9 +17,9 @@ package devpts import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/tools/go_marshal/primitive" ) // Terminal is a pseudoterminal. diff --git a/pkg/sentry/fsimpl/fuse/BUILD b/pkg/sentry/fsimpl/fuse/BUILD index 53a4f3012..999c16bfd 100644 --- a/pkg/sentry/fsimpl/fuse/BUILD +++ b/pkg/sentry/fsimpl/fuse/BUILD @@ -42,6 +42,7 @@ go_library( "//pkg/abi/linux", "//pkg/context", "//pkg/log", + "//pkg/marshal", "//pkg/refs", "//pkg/sentry/fsimpl/devtmpfs", "//pkg/sentry/fsimpl/kernfs", @@ -52,7 +53,6 @@ go_library( "//pkg/syserror", "//pkg/usermem", "//pkg/waiter", - "//tools/go_marshal/marshal", "@org_golang_x_sys//unix:go_default_library", ], ) @@ -64,6 +64,7 @@ go_test( library = ":fuse", deps = [ "//pkg/abi/linux", + "//pkg/marshal", "//pkg/sentry/fsimpl/testutil", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", @@ -71,6 +72,5 @@ go_test( "//pkg/syserror", "//pkg/usermem", "//pkg/waiter", - "//tools/go_marshal/marshal", ], ) diff --git a/pkg/sentry/fsimpl/fuse/connection.go b/pkg/sentry/fsimpl/fuse/connection.go index 6df2728ab..eb1d1a2b7 100644 --- a/pkg/sentry/fsimpl/fuse/connection.go +++ b/pkg/sentry/fsimpl/fuse/connection.go @@ -24,12 +24,12 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/marshal" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/waiter" - "gvisor.dev/gvisor/tools/go_marshal/marshal" ) // maxActiveRequestsDefault is the default setting controlling the upper bound diff --git a/pkg/sentry/fsimpl/fuse/dev_test.go b/pkg/sentry/fsimpl/fuse/dev_test.go index aedc2fa39..6baf56520 100644 --- a/pkg/sentry/fsimpl/fuse/dev_test.go +++ b/pkg/sentry/fsimpl/fuse/dev_test.go @@ -21,6 +21,7 @@ import ( "testing" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/marshal" "gvisor.dev/gvisor/pkg/sentry/fsimpl/testutil" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -28,7 +29,6 @@ import ( "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" - "gvisor.dev/gvisor/tools/go_marshal/marshal" ) // echoTestOpcode is the Opcode used during testing. The server used in tests diff --git a/pkg/sentry/fsimpl/host/BUILD b/pkg/sentry/fsimpl/host/BUILD index be1c88c82..56bcf9bdb 100644 --- a/pkg/sentry/fsimpl/host/BUILD +++ b/pkg/sentry/fsimpl/host/BUILD @@ -49,6 +49,7 @@ go_library( "//pkg/fspath", "//pkg/iovec", "//pkg/log", + "//pkg/marshal/primitive", "//pkg/refs", "//pkg/safemem", "//pkg/sentry/arch", @@ -72,7 +73,6 @@ go_library( "//pkg/unet", "//pkg/usermem", "//pkg/waiter", - "//tools/go_marshal/primitive", "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/pkg/sentry/fsimpl/host/socket_unsafe.go b/pkg/sentry/fsimpl/host/socket_unsafe.go index 35ded24bc..c0bf45f08 100644 --- a/pkg/sentry/fsimpl/host/socket_unsafe.go +++ b/pkg/sentry/fsimpl/host/socket_unsafe.go @@ -63,10 +63,10 @@ func fdReadVec(fd int, bufs [][]byte, control []byte, peek bool, maxlen int64) ( controlTrunc = msg.Flags&syscall.MSG_CTRUNC == syscall.MSG_CTRUNC if n > length { - return length, n, msg.Controllen, controlTrunc, err + return length, n, msg.Controllen, controlTrunc, nil } - return n, n, msg.Controllen, controlTrunc, err + return n, n, msg.Controllen, controlTrunc, nil } // fdWriteVec sends from bufs to fd. diff --git a/pkg/sentry/fsimpl/host/tty.go b/pkg/sentry/fsimpl/host/tty.go index 7a9be4b97..97cefa350 100644 --- a/pkg/sentry/fsimpl/host/tty.go +++ b/pkg/sentry/fsimpl/host/tty.go @@ -17,6 +17,7 @@ package host import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/kernel" @@ -25,7 +26,6 @@ import ( "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" - "gvisor.dev/gvisor/tools/go_marshal/primitive" ) // TTYFileDescription implements vfs.FileDescriptionImpl for a host file diff --git a/pkg/sentry/fsimpl/overlay/copy_up.go b/pkg/sentry/fsimpl/overlay/copy_up.go index c589b4746..360b77ef6 100644 --- a/pkg/sentry/fsimpl/overlay/copy_up.go +++ b/pkg/sentry/fsimpl/overlay/copy_up.go @@ -23,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" @@ -81,6 +82,8 @@ func (d *dentry) copyUpLocked(ctx context.Context) error { Start: d.parent.upperVD, Path: fspath.Parse(d.name), } + // Used during copy-up of memory-mapped regular files. + var mmapOpts *memmap.MMapOpts cleanupUndoCopyUp := func() { var err error if ftype == linux.S_IFDIR { @@ -136,6 +139,25 @@ func (d *dentry) copyUpLocked(ctx context.Context) error { break } } + d.mapsMu.Lock() + defer d.mapsMu.Unlock() + if d.wrappedMappable != nil { + // We may have memory mappings of the file on the lower layer. + // Switch to mapping the file on the upper layer instead. + mmapOpts = &memmap.MMapOpts{ + Perms: usermem.ReadWrite, + MaxPerms: usermem.ReadWrite, + } + if err := newFD.ConfigureMMap(ctx, mmapOpts); err != nil { + cleanupUndoCopyUp() + return err + } + if mmapOpts.MappingIdentity != nil { + mmapOpts.MappingIdentity.DecRef(ctx) + } + // Don't actually switch Mappables until the end of copy-up; see + // below for why. + } if err := newFD.SetStat(ctx, vfs.SetStatOptions{ Stat: linux.Statx{ Mask: linux.STATX_UID | linux.STATX_GID, @@ -265,6 +287,62 @@ func (d *dentry) copyUpLocked(ctx context.Context) error { atomic.StoreUint64(&d.ino, upperStat.Ino) } + if mmapOpts != nil && mmapOpts.Mappable != nil { + // Note that if mmapOpts != nil, then d.mapsMu is locked for writing + // (from the S_IFREG path above). + + // Propagate mappings of d to the new Mappable. Remember which mappings + // we added so we can remove them on failure. + upperMappable := mmapOpts.Mappable + allAdded := make(map[memmap.MappableRange]memmap.MappingsOfRange) + for seg := d.lowerMappings.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + added := make(memmap.MappingsOfRange) + for m := range seg.Value() { + if err := upperMappable.AddMapping(ctx, m.MappingSpace, m.AddrRange, seg.Start(), m.Writable); err != nil { + for m := range added { + upperMappable.RemoveMapping(ctx, m.MappingSpace, m.AddrRange, seg.Start(), m.Writable) + } + for mr, mappings := range allAdded { + for m := range mappings { + upperMappable.RemoveMapping(ctx, m.MappingSpace, m.AddrRange, mr.Start, m.Writable) + } + } + return err + } + added[m] = struct{}{} + } + allAdded[seg.Range()] = added + } + + // Switch to the new Mappable. We do this at the end of copy-up + // because: + // + // - We need to switch Mappables (by changing d.wrappedMappable) before + // invalidating Translations from the old Mappable (to pick up + // Translations from the new one). + // + // - We need to lock d.dataMu while changing d.wrappedMappable, but + // must invalidate Translations with d.dataMu unlocked (due to lock + // ordering). + // + // - Consequently, once we unlock d.dataMu, other threads may + // immediately observe the new (copied-up) Mappable, which we want to + // delay until copy-up is guaranteed to succeed. + d.dataMu.Lock() + lowerMappable := d.wrappedMappable + d.wrappedMappable = upperMappable + d.dataMu.Unlock() + d.lowerMappings.InvalidateAll(memmap.InvalidateOpts{}) + + // Remove mappings from the old Mappable. + for seg := d.lowerMappings.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + for m := range seg.Value() { + lowerMappable.RemoveMapping(ctx, m.MappingSpace, m.AddrRange, seg.Start(), m.Writable) + } + } + d.lowerMappings.RemoveAll() + } + atomic.StoreUint32(&d.copiedUp, 1) return nil } diff --git a/pkg/sentry/fsimpl/overlay/non_directory.go b/pkg/sentry/fsimpl/overlay/non_directory.go index 268b32537..74cfd3799 100644 --- a/pkg/sentry/fsimpl/overlay/non_directory.go +++ b/pkg/sentry/fsimpl/overlay/non_directory.go @@ -23,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -256,10 +257,105 @@ func (fd *nonDirectoryFD) Sync(ctx context.Context) error { // ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap. func (fd *nonDirectoryFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { - wrappedFD, err := fd.getCurrentFD(ctx) + if err := fd.ensureMappable(ctx, opts); err != nil { + return err + } + return vfs.GenericConfigureMMap(&fd.vfsfd, fd.dentry(), opts) +} + +// ensureMappable ensures that fd.dentry().wrappedMappable is not nil. +func (fd *nonDirectoryFD) ensureMappable(ctx context.Context, opts *memmap.MMapOpts) error { + d := fd.dentry() + + // Fast path if we already have a Mappable for the current top layer. + if atomic.LoadUint32(&d.isMappable) != 0 { + return nil + } + + // Only permit mmap of regular files, since other file types may have + // unpredictable behavior when mmapped (e.g. /dev/zero). + if atomic.LoadUint32(&d.mode)&linux.S_IFMT != linux.S_IFREG { + return syserror.ENODEV + } + + // Get a Mappable for the current top layer. + fd.mu.Lock() + defer fd.mu.Unlock() + d.copyMu.RLock() + defer d.copyMu.RUnlock() + if atomic.LoadUint32(&d.isMappable) != 0 { + return nil + } + wrappedFD, err := fd.currentFDLocked(ctx) if err != nil { return err } - defer wrappedFD.DecRef(ctx) - return wrappedFD.ConfigureMMap(ctx, opts) + if err := wrappedFD.ConfigureMMap(ctx, opts); err != nil { + return err + } + if opts.MappingIdentity != nil { + opts.MappingIdentity.DecRef(ctx) + opts.MappingIdentity = nil + } + // Use this Mappable for all mappings of this layer (unless we raced with + // another call to ensureMappable). + d.mapsMu.Lock() + defer d.mapsMu.Unlock() + d.dataMu.Lock() + defer d.dataMu.Unlock() + if d.wrappedMappable == nil { + d.wrappedMappable = opts.Mappable + atomic.StoreUint32(&d.isMappable, 1) + } + return nil +} + +// AddMapping implements memmap.Mappable.AddMapping. +func (d *dentry) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) error { + d.mapsMu.Lock() + defer d.mapsMu.Unlock() + if err := d.wrappedMappable.AddMapping(ctx, ms, ar, offset, writable); err != nil { + return err + } + if !d.isCopiedUp() { + d.lowerMappings.AddMapping(ms, ar, offset, writable) + } + return nil +} + +// RemoveMapping implements memmap.Mappable.RemoveMapping. +func (d *dentry) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) { + d.mapsMu.Lock() + defer d.mapsMu.Unlock() + d.wrappedMappable.RemoveMapping(ctx, ms, ar, offset, writable) + if !d.isCopiedUp() { + d.lowerMappings.RemoveMapping(ms, ar, offset, writable) + } +} + +// CopyMapping implements memmap.Mappable.CopyMapping. +func (d *dentry) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, writable bool) error { + d.mapsMu.Lock() + defer d.mapsMu.Unlock() + if err := d.wrappedMappable.CopyMapping(ctx, ms, srcAR, dstAR, offset, writable); err != nil { + return err + } + if !d.isCopiedUp() { + d.lowerMappings.AddMapping(ms, dstAR, offset, writable) + } + return nil +} + +// Translate implements memmap.Mappable.Translate. +func (d *dentry) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) { + d.dataMu.RLock() + defer d.dataMu.RUnlock() + return d.wrappedMappable.Translate(ctx, required, optional, at) +} + +// InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable. +func (d *dentry) InvalidateUnsavable(ctx context.Context) error { + d.mapsMu.Lock() + defer d.mapsMu.Unlock() + return d.wrappedMappable.InvalidateUnsavable(ctx) } diff --git a/pkg/sentry/fsimpl/overlay/overlay.go b/pkg/sentry/fsimpl/overlay/overlay.go index 9a8f7010e..b2efe5f80 100644 --- a/pkg/sentry/fsimpl/overlay/overlay.go +++ b/pkg/sentry/fsimpl/overlay/overlay.go @@ -22,6 +22,10 @@ // filesystem.renameMu // dentry.dirMu // dentry.copyMu +// *** "memmap.Mappable locks" below this point +// dentry.mapsMu +// *** "memmap.Mappable locks taken by Translate" below this point +// dentry.dataMu // // Locking dentry.dirMu in multiple dentries requires that parent dentries are // locked before child dentries, and that filesystem.renameMu is locked to @@ -37,6 +41,7 @@ import ( "gvisor.dev/gvisor/pkg/fspath" fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" @@ -419,6 +424,35 @@ type dentry struct { devMinor uint32 ino uint64 + // If this dentry represents a regular file, then: + // + // - mapsMu is used to synchronize between copy-up and memmap.Mappable + // methods on dentry preceding mm.MemoryManager.activeMu in the lock order. + // + // - dataMu is used to synchronize between copy-up and + // dentry.(memmap.Mappable).Translate. + // + // - lowerMappings tracks memory mappings of the file. lowerMappings is + // used to invalidate mappings of the lower layer when the file is copied + // up to ensure that they remain coherent with subsequent writes to the + // file. (Note that, as of this writing, Linux overlayfs does not do this; + // this feature is a gVisor extension.) lowerMappings is protected by + // mapsMu. + // + // - If this dentry is copied-up, then wrappedMappable is the Mappable + // obtained from a call to the current top layer's + // FileDescription.ConfigureMMap(). Once wrappedMappable becomes non-nil + // (from a call to nonDirectoryFD.ensureMappable()), it cannot become nil. + // wrappedMappable is protected by mapsMu and dataMu. + // + // - isMappable is non-zero iff wrappedMappable is non-nil. isMappable is + // accessed using atomic memory operations. + mapsMu sync.Mutex + lowerMappings memmap.MappingSet + dataMu sync.RWMutex + wrappedMappable memmap.Mappable + isMappable uint32 + locks vfs.FileLocks } diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go index 356036b9b..ce87b0d47 100644 --- a/pkg/sentry/fsimpl/proc/task_files.go +++ b/pkg/sentry/fsimpl/proc/task_files.go @@ -543,7 +543,7 @@ func (s *statusData) Generate(ctx context.Context, buf *bytes.Buffer) error { var vss, rss, data uint64 s.task.WithMuLocked(func(t *kernel.Task) { if fdTable := t.FDTable(); fdTable != nil { - fds = fdTable.Size() + fds = fdTable.CurrentMaxFDs() } if mm := t.MemoryManager(); mm != nil { vss = mm.VirtualMemorySize() diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD index d436daab4..66cf4f207 100644 --- a/pkg/sentry/kernel/BUILD +++ b/pkg/sentry/kernel/BUILD @@ -212,6 +212,7 @@ go_library( "//pkg/eventchannel", "//pkg/fspath", "//pkg/log", + "//pkg/marshal", "//pkg/metric", "//pkg/refs", "//pkg/refs_vfs2", @@ -261,7 +262,6 @@ go_library( "//pkg/tcpip/stack", "//pkg/usermem", "//pkg/waiter", - "//tools/go_marshal/marshal", ], ) diff --git a/pkg/sentry/kernel/fd_table.go b/pkg/sentry/kernel/fd_table.go index 89223fa36..e9c382f82 100644 --- a/pkg/sentry/kernel/fd_table.go +++ b/pkg/sentry/kernel/fd_table.go @@ -111,6 +111,7 @@ func (f *FDTable) saveDescriptorTable() map[int32]descriptor { func (f *FDTable) loadDescriptorTable(m map[int32]descriptor) { ctx := context.Background() f.init() // Initialize table. + f.used = 0 for fd, d := range m { f.setAll(ctx, fd, d.file, d.fileVFS2, d.flags) @@ -186,12 +187,6 @@ func (f *FDTable) DecRef(ctx context.Context) { }) } -// Size returns the number of file descriptor slots currently allocated. -func (f *FDTable) Size() int { - size := atomic.LoadInt32(&f.used) - return int(size) -} - // forEach iterates over all non-nil files in sorted order. // // It is the caller's responsibility to acquire an appropriate lock. @@ -550,30 +545,6 @@ func (f *FDTable) GetFDs(ctx context.Context) []int32 { return fds } -// GetRefs returns a stable slice of references to all files and bumps the -// reference count on each. The caller must use DecRef on each reference when -// they're done using the slice. -func (f *FDTable) GetRefs(ctx context.Context) []*fs.File { - files := make([]*fs.File, 0, f.Size()) - f.forEach(ctx, func(_ int32, file *fs.File, _ *vfs.FileDescription, _ FDFlags) { - file.IncRef() // Acquire a reference for caller. - files = append(files, file) - }) - return files -} - -// GetRefsVFS2 returns a stable slice of references to all files and bumps the -// reference count on each. The caller must use DecRef on each reference when -// they're done using the slice. -func (f *FDTable) GetRefsVFS2(ctx context.Context) []*vfs.FileDescription { - files := make([]*vfs.FileDescription, 0, f.Size()) - f.forEach(ctx, func(_ int32, _ *fs.File, file *vfs.FileDescription, _ FDFlags) { - file.IncRef() // Acquire a reference for caller. - files = append(files, file) - }) - return files -} - // Fork returns an independent FDTable. func (f *FDTable) Fork(ctx context.Context) *FDTable { clone := f.k.NewFDTable() diff --git a/pkg/sentry/kernel/fd_table_unsafe.go b/pkg/sentry/kernel/fd_table_unsafe.go index 555b14f8e..1c977b60f 100644 --- a/pkg/sentry/kernel/fd_table_unsafe.go +++ b/pkg/sentry/kernel/fd_table_unsafe.go @@ -79,6 +79,13 @@ func (f *FDTable) getAll(fd int32) (*fs.File, *vfs.FileDescription, FDFlags, boo return d.file, d.fileVFS2, d.flags, true } +// CurrentMaxFDs returns the number of file descriptors that may be stored in f +// without reallocation. +func (f *FDTable) CurrentMaxFDs() int { + slice := *(*[]unsafe.Pointer)(atomic.LoadPointer(&f.slice)) + return len(slice) +} + // set sets an entry. // // This handles accounting changes, as well as acquiring and releasing the diff --git a/pkg/sentry/socket/BUILD b/pkg/sentry/socket/BUILD index c0fd3425b..a3f775d15 100644 --- a/pkg/sentry/socket/BUILD +++ b/pkg/sentry/socket/BUILD @@ -10,6 +10,7 @@ go_library( "//pkg/abi/linux", "//pkg/binary", "//pkg/context", + "//pkg/marshal", "//pkg/sentry/device", "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", @@ -20,6 +21,5 @@ go_library( "//pkg/syserr", "//pkg/tcpip", "//pkg/usermem", - "//tools/go_marshal/marshal", ], ) diff --git a/pkg/sentry/socket/hostinet/BUILD b/pkg/sentry/socket/hostinet/BUILD index e76e498de..632e33452 100644 --- a/pkg/sentry/socket/hostinet/BUILD +++ b/pkg/sentry/socket/hostinet/BUILD @@ -21,6 +21,8 @@ go_library( "//pkg/context", "//pkg/fdnotifier", "//pkg/log", + "//pkg/marshal", + "//pkg/marshal/primitive", "//pkg/safemem", "//pkg/sentry/arch", "//pkg/sentry/device", @@ -40,8 +42,6 @@ go_library( "//pkg/tcpip/stack", "//pkg/usermem", "//pkg/waiter", - "//tools/go_marshal/marshal", - "//tools/go_marshal/primitive", "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/pkg/sentry/socket/hostinet/socket.go b/pkg/sentry/socket/hostinet/socket.go index 242e6bf76..7d3c4a01c 100644 --- a/pkg/sentry/socket/hostinet/socket.go +++ b/pkg/sentry/socket/hostinet/socket.go @@ -24,6 +24,8 @@ import ( "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fdnotifier" "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/marshal" + "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -36,8 +38,6 @@ import ( "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" - "gvisor.dev/gvisor/tools/go_marshal/marshal" - "gvisor.dev/gvisor/tools/go_marshal/primitive" ) const ( diff --git a/pkg/sentry/socket/netlink/BUILD b/pkg/sentry/socket/netlink/BUILD index 0546801bf..1f926aa91 100644 --- a/pkg/sentry/socket/netlink/BUILD +++ b/pkg/sentry/socket/netlink/BUILD @@ -16,6 +16,8 @@ go_library( "//pkg/abi/linux", "//pkg/binary", "//pkg/context", + "//pkg/marshal", + "//pkg/marshal/primitive", "//pkg/sentry/arch", "//pkg/sentry/device", "//pkg/sentry/fs", @@ -36,8 +38,6 @@ go_library( "//pkg/tcpip", "//pkg/usermem", "//pkg/waiter", - "//tools/go_marshal/marshal", - "//tools/go_marshal/primitive", ], ) diff --git a/pkg/sentry/socket/netlink/socket.go b/pkg/sentry/socket/netlink/socket.go index 68a9b9a96..5ddcd4be5 100644 --- a/pkg/sentry/socket/netlink/socket.go +++ b/pkg/sentry/socket/netlink/socket.go @@ -21,6 +21,8 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/binary" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/marshal" + "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/device" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -38,8 +40,6 @@ import ( "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" - "gvisor.dev/gvisor/tools/go_marshal/marshal" - "gvisor.dev/gvisor/tools/go_marshal/primitive" ) const sizeOfInt32 int = 4 diff --git a/pkg/sentry/socket/netstack/BUILD b/pkg/sentry/socket/netstack/BUILD index 1fb777a6c..fae3b6783 100644 --- a/pkg/sentry/socket/netstack/BUILD +++ b/pkg/sentry/socket/netstack/BUILD @@ -22,6 +22,8 @@ go_library( "//pkg/binary", "//pkg/context", "//pkg/log", + "//pkg/marshal", + "//pkg/marshal/primitive", "//pkg/metric", "//pkg/safemem", "//pkg/sentry/arch", @@ -51,8 +53,6 @@ go_library( "//pkg/tcpip/transport/udp", "//pkg/usermem", "//pkg/waiter", - "//tools/go_marshal/marshal", - "//tools/go_marshal/primitive", "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 91790834b..2e568bc3d 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -40,6 +40,8 @@ import ( "gvisor.dev/gvisor/pkg/binary" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/marshal" + "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/metric" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/arch" @@ -62,8 +64,6 @@ import ( "gvisor.dev/gvisor/pkg/tcpip/transport/udp" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" - "gvisor.dev/gvisor/tools/go_marshal/marshal" - "gvisor.dev/gvisor/tools/go_marshal/primitive" ) func mustCreateMetric(name, description string) *tcpip.StatCounter { diff --git a/pkg/sentry/socket/netstack/netstack_vfs2.go b/pkg/sentry/socket/netstack/netstack_vfs2.go index 0f342e655..c0212ad76 100644 --- a/pkg/sentry/socket/netstack/netstack_vfs2.go +++ b/pkg/sentry/socket/netstack/netstack_vfs2.go @@ -18,6 +18,8 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/amutex" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/marshal" + "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/fsimpl/sockfs" @@ -29,8 +31,6 @@ import ( "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" - "gvisor.dev/gvisor/tools/go_marshal/marshal" - "gvisor.dev/gvisor/tools/go_marshal/primitive" ) // SocketVFS2 encapsulates all the state needed to represent a network stack diff --git a/pkg/sentry/socket/socket.go b/pkg/sentry/socket/socket.go index 04b259d27..fd31479e5 100644 --- a/pkg/sentry/socket/socket.go +++ b/pkg/sentry/socket/socket.go @@ -25,6 +25,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/binary" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/marshal" "gvisor.dev/gvisor/pkg/sentry/device" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" @@ -35,7 +36,6 @@ import ( "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/usermem" - "gvisor.dev/gvisor/tools/go_marshal/marshal" ) // ControlMessages represents the union of unix control messages and tcpip diff --git a/pkg/sentry/socket/unix/BUILD b/pkg/sentry/socket/unix/BUILD index cb953e4dc..a89583dad 100644 --- a/pkg/sentry/socket/unix/BUILD +++ b/pkg/sentry/socket/unix/BUILD @@ -29,6 +29,7 @@ go_library( "//pkg/context", "//pkg/fspath", "//pkg/log", + "//pkg/marshal", "//pkg/refs", "//pkg/safemem", "//pkg/sentry/arch", @@ -49,6 +50,5 @@ go_library( "//pkg/tcpip", "//pkg/usermem", "//pkg/waiter", - "//tools/go_marshal/marshal", ], ) diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go index 616530eb6..917055cea 100644 --- a/pkg/sentry/socket/unix/unix.go +++ b/pkg/sentry/socket/unix/unix.go @@ -24,6 +24,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" + "gvisor.dev/gvisor/pkg/marshal" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" @@ -39,7 +40,6 @@ import ( "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" - "gvisor.dev/gvisor/tools/go_marshal/marshal" ) // SocketOperations is a Unix socket. It is similar to a netstack socket, diff --git a/pkg/sentry/socket/unix/unix_vfs2.go b/pkg/sentry/socket/unix/unix_vfs2.go index e25c7e84a..3688f22d2 100644 --- a/pkg/sentry/socket/unix/unix_vfs2.go +++ b/pkg/sentry/socket/unix/unix_vfs2.go @@ -18,6 +18,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" + "gvisor.dev/gvisor/pkg/marshal" "gvisor.dev/gvisor/pkg/sentry/arch" fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/fsimpl/sockfs" @@ -32,7 +33,6 @@ import ( "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" - "gvisor.dev/gvisor/tools/go_marshal/marshal" ) // SocketVFS2 implements socket.SocketVFS2 (and by extension, diff --git a/pkg/sentry/syscalls/linux/BUILD b/pkg/sentry/syscalls/linux/BUILD index 4a9b04fd0..86f224c86 100644 --- a/pkg/sentry/syscalls/linux/BUILD +++ b/pkg/sentry/syscalls/linux/BUILD @@ -64,6 +64,8 @@ go_library( "//pkg/bpf", "//pkg/context", "//pkg/log", + "//pkg/marshal", + "//pkg/marshal/primitive", "//pkg/metric", "//pkg/rand", "//pkg/safemem", @@ -99,7 +101,5 @@ go_library( "//pkg/syserror", "//pkg/usermem", "//pkg/waiter", - "//tools/go_marshal/marshal", - "//tools/go_marshal/primitive", ], ) diff --git a/pkg/sentry/syscalls/linux/linux64.go b/pkg/sentry/syscalls/linux/linux64.go index da6bd85e1..5f26697d2 100644 --- a/pkg/sentry/syscalls/linux/linux64.go +++ b/pkg/sentry/syscalls/linux/linux64.go @@ -138,7 +138,7 @@ var AMD64 = &kernel.SyscallTable{ 83: syscalls.Supported("mkdir", Mkdir), 84: syscalls.Supported("rmdir", Rmdir), 85: syscalls.Supported("creat", Creat), - 86: syscalls.Supported("link", Link), + 86: syscalls.PartiallySupported("link", Link, "Limited support with Gofer. Link count and linked files may get out of sync because gVisor is not aware of external hardlinks.", nil), 87: syscalls.Supported("unlink", Unlink), 88: syscalls.Supported("symlink", Symlink), 89: syscalls.Supported("readlink", Readlink), @@ -317,7 +317,7 @@ var AMD64 = &kernel.SyscallTable{ 262: syscalls.Supported("fstatat", Fstatat), 263: syscalls.Supported("unlinkat", Unlinkat), 264: syscalls.Supported("renameat", Renameat), - 265: syscalls.Supported("linkat", Linkat), + 265: syscalls.PartiallySupported("linkat", Linkat, "See link(2).", nil), 266: syscalls.Supported("symlinkat", Symlinkat), 267: syscalls.Supported("readlinkat", Readlinkat), 268: syscalls.Supported("fchmodat", Fchmodat), diff --git a/pkg/sentry/syscalls/linux/sys_socket.go b/pkg/sentry/syscalls/linux/sys_socket.go index e4528d095..161d14ded 100644 --- a/pkg/sentry/syscalls/linux/sys_socket.go +++ b/pkg/sentry/syscalls/linux/sys_socket.go @@ -19,6 +19,8 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/marshal" + "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" @@ -29,8 +31,6 @@ import ( "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" - "gvisor.dev/gvisor/tools/go_marshal/marshal" - "gvisor.dev/gvisor/tools/go_marshal/primitive" ) // LINT.IfChange diff --git a/pkg/sentry/syscalls/linux/vfs2/BUILD b/pkg/sentry/syscalls/linux/vfs2/BUILD index 0030dee39..9ee766552 100644 --- a/pkg/sentry/syscalls/linux/vfs2/BUILD +++ b/pkg/sentry/syscalls/linux/vfs2/BUILD @@ -45,6 +45,8 @@ go_library( "//pkg/fspath", "//pkg/gohacks", "//pkg/log", + "//pkg/marshal", + "//pkg/marshal/primitive", "//pkg/sentry/arch", "//pkg/sentry/fs/lock", "//pkg/sentry/fsbridge", @@ -73,7 +75,5 @@ go_library( "//pkg/syserror", "//pkg/usermem", "//pkg/waiter", - "//tools/go_marshal/marshal", - "//tools/go_marshal/primitive", ], ) diff --git a/pkg/sentry/syscalls/linux/vfs2/socket.go b/pkg/sentry/syscalls/linux/vfs2/socket.go index a15dad29f..c899e3a72 100644 --- a/pkg/sentry/syscalls/linux/vfs2/socket.go +++ b/pkg/sentry/syscalls/linux/vfs2/socket.go @@ -19,6 +19,8 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/marshal" + "gvisor.dev/gvisor/pkg/marshal/primitive" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" @@ -30,8 +32,6 @@ import ( "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" - "gvisor.dev/gvisor/tools/go_marshal/marshal" - "gvisor.dev/gvisor/tools/go_marshal/primitive" ) // minListenBacklog is the minimum reasonable backlog for listening sockets. diff --git a/pkg/tcpip/header/ipv4.go b/pkg/tcpip/header/ipv4.go index 680eafd16..e8816c3f4 100644 --- a/pkg/tcpip/header/ipv4.go +++ b/pkg/tcpip/header/ipv4.go @@ -88,6 +88,16 @@ const ( // units, the header cannot exceed 15*4 = 60 bytes. IPv4MaximumHeaderSize = 60 + // IPv4MaximumPayloadSize is the maximum size of a valid IPv4 payload. + // + // Linux limits this to 65,515 octets (the max IP datagram size - the IPv4 + // header size). But RFC 791 section 3.2 discusses the design of the IPv4 + // fragment "allows 2**13 = 8192 fragments of 8 octets each for a total of + // 65,536 octets. Note that this is consistent with the the datagram total + // length field (of course, the header is counted in the total length and not + // in the fragments)." + IPv4MaximumPayloadSize = 65536 + // MinIPFragmentPayloadSize is the minimum number of payload bytes that // the first fragment must carry when an IPv4 packet is fragmented. MinIPFragmentPayloadSize = 8 diff --git a/pkg/tcpip/header/ipv6.go b/pkg/tcpip/header/ipv6.go index ea3823898..0761a1807 100644 --- a/pkg/tcpip/header/ipv6.go +++ b/pkg/tcpip/header/ipv6.go @@ -74,6 +74,10 @@ const ( // IPv6AddressSize is the size, in bytes, of an IPv6 address. IPv6AddressSize = 16 + // IPv6MaximumPayloadSize is the maximum size of a valid IPv6 payload per + // RFC 8200 Section 4.5. + IPv6MaximumPayloadSize = 65535 + // IPv6ProtocolNumber is IPv6's network protocol number. IPv6ProtocolNumber tcpip.NetworkProtocolNumber = 0x86dd diff --git a/pkg/tcpip/header/udp.go b/pkg/tcpip/header/udp.go index 9339d637f..98bdd29db 100644 --- a/pkg/tcpip/header/udp.go +++ b/pkg/tcpip/header/udp.go @@ -16,6 +16,7 @@ package header import ( "encoding/binary" + "math" "gvisor.dev/gvisor/pkg/tcpip" ) @@ -55,6 +56,10 @@ const ( // UDPMinimumSize is the minimum size of a valid UDP packet. UDPMinimumSize = 8 + // UDPMaximumSize is the maximum size of a valid UDP packet. The length field + // in the UDP header is 16 bits as per RFC 768. + UDPMaximumSize = math.MaxUint16 + // UDPProtocolNumber is UDP's transport protocol number. UDPProtocolNumber tcpip.TransportProtocolNumber = 17 ) diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go index fa4ae2012..f4394749d 100644 --- a/pkg/tcpip/network/ipv4/ipv4.go +++ b/pkg/tcpip/network/ipv4/ipv4.go @@ -404,11 +404,15 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) { return } // The packet is a fragment, let's try to reassemble it. - last := h.FragmentOffset() + uint16(pkt.Data.Size()) - 1 - // Drop the packet if the fragmentOffset is incorrect. i.e the - // combination of fragmentOffset and pkt.Data.size() causes a - // wrap around resulting in last being less than the offset. - if last < h.FragmentOffset() { + start := h.FragmentOffset() + // Drop the fragment if the size of the reassembled payload would exceed the + // maximum payload size. + // + // Note that this addition doesn't overflow even on 32bit architecture + // because pkt.Data.Size() should not exceed 65535 (the max IP datagram + // size). Otherwise the packet would've been rejected as invalid before + // reaching here. + if int(start)+pkt.Data.Size() > header.IPv4MaximumPayloadSize { r.Stats().IP.MalformedPacketsReceived.Increment() r.Stats().IP.MalformedFragmentsReceived.Increment() return @@ -425,8 +429,8 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) { ID: uint32(h.ID()), Protocol: proto, }, - h.FragmentOffset(), - last, + start, + start+uint16(pkt.Data.Size())-1, h.More(), proto, pkt.Data, diff --git a/pkg/tcpip/network/ipv4/ipv4_test.go b/pkg/tcpip/network/ipv4/ipv4_test.go index 197e3bc51..2365b54f0 100644 --- a/pkg/tcpip/network/ipv4/ipv4_test.go +++ b/pkg/tcpip/network/ipv4/ipv4_test.go @@ -372,115 +372,308 @@ func TestFragmentationErrors(t *testing.T) { } func TestInvalidFragments(t *testing.T) { + const ( + nicID = 1 + linkAddr = tcpip.LinkAddress("\x0a\x0b\x0c\x0d\x0e\x0e") + addr1 = "\x0a\x00\x00\x01" + addr2 = "\x0a\x00\x00\x02" + tos = 0 + ident = 1 + ttl = 48 + protocol = 6 + ) + + payloadGen := func(payloadLen int) []byte { + payload := make([]byte, payloadLen) + for i := 0; i < len(payload); i++ { + payload[i] = 0x30 + } + return payload + } + + type fragmentData struct { + ipv4fields header.IPv4Fields + payload []byte + autoChecksum bool // if true, the Checksum field will be overwritten. + } + // These packets have both IHL and TotalLength set to 0. - testCases := []struct { + tests := []struct { name string - packets [][]byte + fragments []fragmentData wantMalformedIPPackets uint64 wantMalformedFragments uint64 }{ { - "ihl_totallen_zero_valid_frag_offset", - [][]byte{ - {0x40, 0x30, 0x00, 0x00, 0x6c, 0x74, 0x7d, 0x30, 0x30, 0x30, 0x30, 0x30, 0x39, 0x32, 0x39, 0x33, 0xff, 0xff, 0xff, 0xff, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30}, - }, - 1, - 0, - }, - { - "ihl_totallen_zero_invalid_frag_offset", - [][]byte{ - {0x40, 0x30, 0x00, 0x00, 0x6c, 0x74, 0x20, 0x00, 0x30, 0x30, 0x30, 0x30, 0x39, 0x32, 0x39, 0x33, 0xff, 0xff, 0xff, 0xff, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30}, + name: "IHL and TotalLength zero, FragmentOffset non-zero", + fragments: []fragmentData{ + { + ipv4fields: header.IPv4Fields{ + IHL: 0, + TOS: tos, + TotalLength: 0, + ID: ident, + Flags: header.IPv4FlagDontFragment | header.IPv4FlagMoreFragments, + FragmentOffset: 59776, + TTL: ttl, + Protocol: protocol, + SrcAddr: addr1, + DstAddr: addr2, + }, + payload: payloadGen(12), + autoChecksum: true, + }, }, - 1, - 0, + wantMalformedIPPackets: 1, + wantMalformedFragments: 0, }, { - // Total Length of 37(20 bytes IP header + 17 bytes of - // payload) - // Frag Offset of 0x1ffe = 8190*8 = 65520 - // Leading to the fragment end to be past 65535. - "ihl_totallen_valid_invalid_frag_offset_1", - [][]byte{ - {0x45, 0x30, 0x00, 0x25, 0x6c, 0x74, 0x1f, 0xfe, 0x30, 0x30, 0x30, 0x30, 0x39, 0x32, 0x39, 0x33, 0xff, 0xff, 0xff, 0xff, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30}, + name: "IHL and TotalLength zero, FragmentOffset zero", + fragments: []fragmentData{ + { + ipv4fields: header.IPv4Fields{ + IHL: 0, + TOS: tos, + TotalLength: 0, + ID: ident, + Flags: header.IPv4FlagMoreFragments, + FragmentOffset: 0, + TTL: ttl, + Protocol: protocol, + SrcAddr: addr1, + DstAddr: addr2, + }, + payload: payloadGen(12), + autoChecksum: true, + }, }, - 1, - 1, + wantMalformedIPPackets: 1, + wantMalformedFragments: 0, }, - // The following 3 tests were found by running a fuzzer and were - // triggering a panic in the IPv4 reassembler code. { - "ihl_less_than_ipv4_minimum_size_1", - [][]byte{ - {0x42, 0x30, 0x0, 0x30, 0x30, 0x40, 0x0, 0xf3, 0x30, 0x1, 0x30, 0x30, 0x73, 0x73, 0x69, 0x6e, 0xff, 0xff, 0xff, 0xff, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30}, - {0x42, 0x30, 0x0, 0x8, 0x30, 0x40, 0x20, 0x0, 0x30, 0x1, 0x30, 0x30, 0x73, 0x73, 0x69, 0x6e, 0xff, 0xff, 0xff, 0xff, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30}, + // Payload 17 octets and Fragment offset 65520 + // Leading to the fragment end to be past 65536. + name: "fragment ends past 65536", + fragments: []fragmentData{ + { + ipv4fields: header.IPv4Fields{ + IHL: header.IPv4MinimumSize, + TOS: tos, + TotalLength: header.IPv4MinimumSize + 17, + ID: ident, + Flags: 0, + FragmentOffset: 65520, + TTL: ttl, + Protocol: protocol, + SrcAddr: addr1, + DstAddr: addr2, + }, + payload: payloadGen(17), + autoChecksum: true, + }, }, - 2, - 0, + wantMalformedIPPackets: 1, + wantMalformedFragments: 1, }, { - "ihl_less_than_ipv4_minimum_size_2", - [][]byte{ - {0x42, 0x30, 0x0, 0x30, 0x30, 0x40, 0xb3, 0x12, 0x30, 0x6, 0x30, 0x30, 0x73, 0x73, 0x69, 0x6e, 0xff, 0xff, 0xff, 0xff, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30}, - {0x42, 0x30, 0x0, 0x8, 0x30, 0x40, 0x20, 0x0, 0x30, 0x6, 0x30, 0x30, 0x73, 0x73, 0x69, 0x6e, 0xff, 0xff, 0xff, 0xff, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30}, + // Payload 16 octets and fragment offset 65520 + // Leading to the fragment end to be exactly 65536. + name: "fragment ends exactly at 65536", + fragments: []fragmentData{ + { + ipv4fields: header.IPv4Fields{ + IHL: header.IPv4MinimumSize, + TOS: tos, + TotalLength: header.IPv4MinimumSize + 16, + ID: ident, + Flags: 0, + FragmentOffset: 65520, + TTL: ttl, + Protocol: protocol, + SrcAddr: addr1, + DstAddr: addr2, + }, + payload: payloadGen(16), + autoChecksum: true, + }, }, - 2, - 0, + wantMalformedIPPackets: 0, + wantMalformedFragments: 0, }, { - "ihl_less_than_ipv4_minimum_size_3", - [][]byte{ - {0x42, 0x30, 0x0, 0x30, 0x30, 0x40, 0xb3, 0x30, 0x30, 0x6, 0x30, 0x30, 0x73, 0x73, 0x69, 0x6e, 0xff, 0xff, 0xff, 0xff, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30}, - {0x42, 0x30, 0x0, 0x8, 0x30, 0x40, 0x20, 0x0, 0x30, 0x6, 0x30, 0x30, 0x73, 0x73, 0x69, 0x6e, 0xff, 0xff, 0xff, 0xff, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30}, + name: "IHL less than IPv4 minimum size", + fragments: []fragmentData{ + { + ipv4fields: header.IPv4Fields{ + IHL: header.IPv4MinimumSize - 12, + TOS: tos, + TotalLength: header.IPv4MinimumSize + 28, + ID: ident, + Flags: 0, + FragmentOffset: 1944, + TTL: ttl, + Protocol: protocol, + SrcAddr: addr1, + DstAddr: addr2, + }, + payload: payloadGen(28), + autoChecksum: true, + }, + { + ipv4fields: header.IPv4Fields{ + IHL: header.IPv4MinimumSize - 12, + TOS: tos, + TotalLength: header.IPv4MinimumSize - 12, + ID: ident, + Flags: header.IPv4FlagMoreFragments, + FragmentOffset: 0, + TTL: ttl, + Protocol: protocol, + SrcAddr: addr1, + DstAddr: addr2, + }, + payload: payloadGen(28), + autoChecksum: true, + }, }, - 2, - 0, + wantMalformedIPPackets: 2, + wantMalformedFragments: 0, }, { - "fragment_with_short_total_len_extra_payload", - [][]byte{ - {0x46, 0x30, 0x00, 0x30, 0x30, 0x40, 0x0e, 0x12, 0x30, 0x06, 0x30, 0x30, 0x73, 0x73, 0x69, 0x6e, 0xff, 0xff, 0xff, 0xff, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30}, - {0x46, 0x30, 0x00, 0x18, 0x30, 0x40, 0x20, 0x00, 0x30, 0x06, 0x30, 0x30, 0x73, 0x73, 0x69, 0x6e, 0xff, 0xff, 0xff, 0xff, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30}, + name: "fragment with short TotalLength and extra payload", + fragments: []fragmentData{ + { + ipv4fields: header.IPv4Fields{ + IHL: header.IPv4MinimumSize + 4, + TOS: tos, + TotalLength: header.IPv4MinimumSize + 28, + ID: ident, + Flags: 0, + FragmentOffset: 28816, + TTL: ttl, + Protocol: protocol, + SrcAddr: addr1, + DstAddr: addr2, + }, + payload: payloadGen(28), + autoChecksum: true, + }, + { + ipv4fields: header.IPv4Fields{ + IHL: header.IPv4MinimumSize + 4, + TOS: tos, + TotalLength: header.IPv4MinimumSize + 4, + ID: ident, + Flags: header.IPv4FlagMoreFragments, + FragmentOffset: 0, + TTL: ttl, + Protocol: protocol, + SrcAddr: addr1, + DstAddr: addr2, + }, + payload: payloadGen(28), + autoChecksum: true, + }, }, - 1, - 1, + wantMalformedIPPackets: 1, + wantMalformedFragments: 1, }, { - "multiple_fragments_with_more_fragments_set_to_false", - [][]byte{ - {0x45, 0x00, 0x00, 0x1c, 0x30, 0x40, 0x00, 0x10, 0x00, 0x06, 0x34, 0x69, 0x73, 0x73, 0x69, 0x6e, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x45, 0x00, 0x00, 0x1c, 0x30, 0x40, 0x00, 0x01, 0x61, 0x06, 0x34, 0x69, 0x73, 0x73, 0x69, 0x6e, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x45, 0x00, 0x00, 0x1c, 0x30, 0x40, 0x20, 0x00, 0x00, 0x06, 0x34, 0x1e, 0x73, 0x73, 0x69, 0x6e, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + name: "multiple fragments with More Fragments flag set to false", + fragments: []fragmentData{ + { + ipv4fields: header.IPv4Fields{ + IHL: header.IPv4MinimumSize, + TOS: tos, + TotalLength: header.IPv4MinimumSize + 8, + ID: ident, + Flags: 0, + FragmentOffset: 128, + TTL: ttl, + Protocol: protocol, + SrcAddr: addr1, + DstAddr: addr2, + }, + payload: payloadGen(8), + autoChecksum: true, + }, + { + ipv4fields: header.IPv4Fields{ + IHL: header.IPv4MinimumSize, + TOS: tos, + TotalLength: header.IPv4MinimumSize + 8, + ID: ident, + Flags: 0, + FragmentOffset: 8, + TTL: ttl, + Protocol: protocol, + SrcAddr: addr1, + DstAddr: addr2, + }, + payload: payloadGen(8), + autoChecksum: true, + }, + { + ipv4fields: header.IPv4Fields{ + IHL: header.IPv4MinimumSize, + TOS: tos, + TotalLength: header.IPv4MinimumSize + 8, + ID: ident, + Flags: header.IPv4FlagMoreFragments, + FragmentOffset: 0, + TTL: ttl, + Protocol: protocol, + SrcAddr: addr1, + DstAddr: addr2, + }, + payload: payloadGen(8), + autoChecksum: true, + }, }, - 1, - 1, + wantMalformedIPPackets: 1, + wantMalformedFragments: 1, }, } - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - const nicID tcpip.NICID = 42 + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + s := stack.New(stack.Options{ NetworkProtocols: []stack.NetworkProtocol{ ipv4.NewProtocol(), }, }) + e := channel.New(0, 1500, linkAddr) + if err := s.CreateNIC(nicID, e); err != nil { + t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) + } + if err := s.AddAddress(nicID, ipv4.ProtocolNumber, addr2); err != nil { + t.Fatalf("AddAddress(%d, %d, %s) = %s", nicID, header.IPv4ProtocolNumber, addr2, err) + } + + for _, f := range test.fragments { + pktSize := header.IPv4MinimumSize + len(f.payload) + hdr := buffer.NewPrependable(pktSize) - var linkAddr = tcpip.LinkAddress([]byte{0x30, 0x30, 0x30, 0x30, 0x30, 0x30}) - var remoteLinkAddr = tcpip.LinkAddress([]byte{0x30, 0x30, 0x30, 0x30, 0x30, 0x31}) - ep := channel.New(10, 1500, linkAddr) - s.CreateNIC(nicID, sniffer.New(ep)) + ip := header.IPv4(hdr.Prepend(pktSize)) + ip.Encode(&f.ipv4fields) + copy(ip[header.IPv4MinimumSize:], f.payload) - for _, pkt := range tc.packets { - ep.InjectLinkAddr(header.IPv4ProtocolNumber, remoteLinkAddr, stack.NewPacketBuffer(stack.PacketBufferOptions{ - Data: buffer.NewVectorisedView(len(pkt), []buffer.View{pkt}), + if f.autoChecksum { + ip.SetChecksum(0) + ip.SetChecksum(^ip.CalculateChecksum()) + } + + vv := hdr.View().ToVectorisedView() + e.InjectInbound(header.IPv4ProtocolNumber, stack.NewPacketBuffer(stack.PacketBufferOptions{ + Data: vv, })) } - if got, want := s.Stats().IP.MalformedPacketsReceived.Value(), tc.wantMalformedIPPackets; got != want { + if got, want := s.Stats().IP.MalformedPacketsReceived.Value(), test.wantMalformedIPPackets; got != want { t.Errorf("incorrect Stats.IP.MalformedPacketsReceived, got: %d, want: %d", got, want) } - if got, want := s.Stats().IP.MalformedFragmentsReceived.Value(), tc.wantMalformedFragments; got != want { + if got, want := s.Stats().IP.MalformedFragmentsReceived.Value(), test.wantMalformedFragments; got != want { t.Errorf("incorrect Stats.IP.MalformedFragmentsReceived, got: %d, want: %d", got, want) } }) @@ -534,6 +727,9 @@ func TestReceiveFragments(t *testing.T) { // the fragment block size of 8 (RFC 791 section 3.1 page 14). ipv4Payload3Addr1ToAddr2 := udpGen(127, 3, addr1, addr2) udpPayload3Addr1ToAddr2 := ipv4Payload3Addr1ToAddr2[header.UDPMinimumSize:] + // Used to test the max reassembled payload length (65,535 octets). + ipv4Payload4Addr1ToAddr2 := udpGen(header.UDPMaximumSize-header.UDPMinimumSize, 4, addr1, addr2) + udpPayload4Addr1ToAddr2 := ipv4Payload4Addr1ToAddr2[header.UDPMinimumSize:] type fragmentData struct { srcAddr tcpip.Address @@ -827,6 +1023,28 @@ func TestReceiveFragments(t *testing.T) { }, expectedPayloads: nil, }, + { + name: "Two fragments reassembled into a maximum UDP packet", + fragments: []fragmentData{ + { + srcAddr: addr1, + dstAddr: addr2, + id: 1, + flags: header.IPv4FlagMoreFragments, + fragmentOffset: 0, + payload: ipv4Payload4Addr1ToAddr2[:65512], + }, + { + srcAddr: addr1, + dstAddr: addr2, + id: 1, + flags: 0, + fragmentOffset: 65512, + payload: ipv4Payload4Addr1ToAddr2[65512:], + }, + }, + expectedPayloads: [][]byte{udpPayload4Addr1ToAddr2}, + }, } for _, test := range tests { diff --git a/pkg/tcpip/network/ipv6/ipv6.go b/pkg/tcpip/network/ipv6/ipv6.go index af3cd91c6..e821a8bff 100644 --- a/pkg/tcpip/network/ipv6/ipv6.go +++ b/pkg/tcpip/network/ipv6/ipv6.go @@ -311,12 +311,10 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) { // The packet is a fragment, let's try to reassemble it. start := extHdr.FragmentOffset() * header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit - last := start + uint16(fragmentPayloadLen) - 1 - // Drop the packet if the fragmentOffset is incorrect. i.e the - // combination of fragmentOffset and pkt.Data.size() causes a - // wrap around resulting in last being less than the offset. - if last < start { + // Drop the fragment if the size of the reassembled payload would exceed + // the maximum payload size. + if int(start)+fragmentPayloadLen > header.IPv6MaximumPayloadSize { r.Stats().IP.MalformedPacketsReceived.Increment() r.Stats().IP.MalformedFragmentsReceived.Increment() return @@ -333,7 +331,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) { ID: extHdr.ID(), }, start, - last, + start+uint16(fragmentPayloadLen)-1, extHdr.More(), uint8(rawPayload.Identifier), rawPayload.Buf, diff --git a/pkg/tcpip/network/ipv6/ipv6_test.go b/pkg/tcpip/network/ipv6/ipv6_test.go index 54787198f..5f9822c49 100644 --- a/pkg/tcpip/network/ipv6/ipv6_test.go +++ b/pkg/tcpip/network/ipv6/ipv6_test.go @@ -15,6 +15,7 @@ package ipv6 import ( + "math" "testing" "github.com/google/go-cmp/cmp" @@ -687,6 +688,7 @@ func TestReceiveIPv6Fragments(t *testing.T) { // Used to test cases where the fragment blocks are not a multiple of // the fragment block size of 8 (RFC 8200 section 4.5). udpPayload3Length = 127 + udpPayload4Length = header.IPv6MaximumPayloadSize - header.UDPMinimumSize fragmentExtHdrLen = 8 // Note, not all routing extension headers will be 8 bytes but this test // uses 8 byte routing extension headers for most sub tests. @@ -731,6 +733,10 @@ func TestReceiveIPv6Fragments(t *testing.T) { udpPayload3Addr1ToAddr2 := udpPayload3Addr1ToAddr2Buf[:] ipv6Payload3Addr1ToAddr2 := udpGen(udpPayload3Addr1ToAddr2, 3, addr1, addr2) + var udpPayload4Addr1ToAddr2Buf [udpPayload4Length]byte + udpPayload4Addr1ToAddr2 := udpPayload4Addr1ToAddr2Buf[:] + ipv6Payload4Addr1ToAddr2 := udpGen(udpPayload4Addr1ToAddr2, 4, addr1, addr2) + tests := []struct { name string expectedPayload []byte @@ -1020,6 +1026,44 @@ func TestReceiveIPv6Fragments(t *testing.T) { expectedPayloads: nil, }, { + name: "Two fragments reassembled into a maximum UDP packet", + fragments: []fragmentData{ + { + srcAddr: addr1, + dstAddr: addr2, + nextHdr: fragmentExtHdrID, + data: buffer.NewVectorisedView( + fragmentExtHdrLen+65520, + []buffer.View{ + // Fragment extension header. + // + // Fragment offset = 0, More = true, ID = 1 + buffer.View([]byte{uint8(header.UDPProtocolNumber), 0, 0, 1, 0, 0, 0, 1}), + + ipv6Payload4Addr1ToAddr2[:65520], + }, + ), + }, + { + srcAddr: addr1, + dstAddr: addr2, + nextHdr: fragmentExtHdrID, + data: buffer.NewVectorisedView( + fragmentExtHdrLen+len(ipv6Payload4Addr1ToAddr2)-65520, + []buffer.View{ + // Fragment extension header. + // + // Fragment offset = 8190, More = false, ID = 1 + buffer.View([]byte{uint8(header.UDPProtocolNumber), 0, 255, 240, 0, 0, 0, 1}), + + ipv6Payload4Addr1ToAddr2[65520:], + }, + ), + }, + }, + expectedPayloads: [][]byte{udpPayload4Addr1ToAddr2}, + }, + { name: "Two fragments with per-fragment routing header with zero segments left", fragments: []fragmentData{ { @@ -1572,3 +1616,96 @@ func TestReceiveIPv6Fragments(t *testing.T) { }) } } + +func TestInvalidIPv6Fragments(t *testing.T) { + const ( + nicID = 1 + fragmentExtHdrLen = 8 + ) + + payloadGen := func(payloadLen int) []byte { + payload := make([]byte, payloadLen) + for i := 0; i < len(payload); i++ { + payload[i] = 0x30 + } + return payload + } + + tests := []struct { + name string + fragments []fragmentData + wantMalformedIPPackets uint64 + wantMalformedFragments uint64 + }{ + { + name: "fragments reassembled into a payload exceeding the max IPv6 payload size", + fragments: []fragmentData{ + { + srcAddr: addr1, + dstAddr: addr2, + nextHdr: fragmentExtHdrID, + data: buffer.NewVectorisedView( + fragmentExtHdrLen+(header.IPv6MaximumPayloadSize+1)-16, + []buffer.View{ + // Fragment extension header. + // Fragment offset = 8190, More = false, ID = 1 + buffer.View([]byte{uint8(header.UDPProtocolNumber), 0, + ((header.IPv6MaximumPayloadSize + 1) - 16) >> 8, + ((header.IPv6MaximumPayloadSize + 1) - 16) & math.MaxUint8, + 0, 0, 0, 1}), + // Payload length = 16 + payloadGen(16), + }, + ), + }, + }, + wantMalformedIPPackets: 1, + wantMalformedFragments: 1, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + s := stack.New(stack.Options{ + NetworkProtocols: []stack.NetworkProtocol{ + NewProtocol(), + }, + }) + e := channel.New(0, 1500, linkAddr1) + if err := s.CreateNIC(nicID, e); err != nil { + t.Fatalf("CreateNIC(%d, _) = %s", nicID, err) + } + if err := s.AddAddress(nicID, ProtocolNumber, addr2); err != nil { + t.Fatalf("AddAddress(%d, %d, %s) = %s", nicID, ProtocolNumber, addr2, err) + } + + for _, f := range test.fragments { + hdr := buffer.NewPrependable(header.IPv6MinimumSize) + + // Serialize IPv6 fixed header. + ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize)) + ip.Encode(&header.IPv6Fields{ + PayloadLength: uint16(f.data.Size()), + NextHeader: f.nextHdr, + HopLimit: 255, + SrcAddr: f.srcAddr, + DstAddr: f.dstAddr, + }) + + vv := hdr.View().ToVectorisedView() + vv.Append(f.data) + + e.InjectInbound(ProtocolNumber, stack.NewPacketBuffer(stack.PacketBufferOptions{ + Data: vv, + })) + } + + if got, want := s.Stats().IP.MalformedPacketsReceived.Value(), test.wantMalformedIPPackets; got != want { + t.Errorf("got Stats.IP.MalformedPacketsReceived = %d, want = %d", got, want) + } + if got, want := s.Stats().IP.MalformedFragmentsReceived.Value(), test.wantMalformedFragments; got != want { + t.Errorf("got Stats.IP.MalformedFragmentsReceived = %d, want = %d", got, want) + } + }) + } +} diff --git a/pkg/tcpip/stack/route.go b/pkg/tcpip/stack/route.go index c2eabde9e..2cbbf0de8 100644 --- a/pkg/tcpip/stack/route.go +++ b/pkg/tcpip/stack/route.go @@ -48,10 +48,6 @@ type Route struct { // Loop controls where WritePacket should send packets. Loop PacketLooping - - // directedBroadcast indicates whether this route is sending a directed - // broadcast packet. - directedBroadcast bool } // makeRoute initializes a new route. It takes ownership of the provided @@ -303,24 +299,27 @@ func (r *Route) Stack() *Stack { return r.ref.stack() } +func (r *Route) isV4Broadcast(addr tcpip.Address) bool { + if addr == header.IPv4Broadcast { + return true + } + + subnet := r.ref.addrWithPrefix().Subnet() + return subnet.IsBroadcast(addr) +} + // IsOutboundBroadcast returns true if the route is for an outbound broadcast // packet. func (r *Route) IsOutboundBroadcast() bool { // Only IPv4 has a notion of broadcast. - return r.directedBroadcast || r.RemoteAddress == header.IPv4Broadcast + return r.isV4Broadcast(r.RemoteAddress) } // IsInboundBroadcast returns true if the route is for an inbound broadcast // packet. func (r *Route) IsInboundBroadcast() bool { // Only IPv4 has a notion of broadcast. - if r.LocalAddress == header.IPv4Broadcast { - return true - } - - addr := r.ref.addrWithPrefix() - subnet := addr.Subnet() - return subnet.IsBroadcast(r.LocalAddress) + return r.isV4Broadcast(r.LocalAddress) } // ReverseRoute returns new route with given source and destination address. diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go index def8b0b43..6a683545d 100644 --- a/pkg/tcpip/stack/stack.go +++ b/pkg/tcpip/stack/stack.go @@ -1311,13 +1311,11 @@ func (s *Stack) FindRoute(id tcpip.NICID, localAddr, remoteAddr tcpip.Address, n } r := makeRoute(netProto, ref.address(), remoteAddr, nic.linkEP.LinkAddress(), ref, s.handleLocal && !nic.isLoopback(), multicastLoop && !nic.isLoopback()) - r.directedBroadcast = route.Destination.IsBroadcast(remoteAddr) - if len(route.Gateway) > 0 { if needRoute { r.NextHop = route.Gateway } - } else if r.directedBroadcast { + } else if subnet := ref.addrWithPrefix().Subnet(); subnet.IsBroadcast(remoteAddr) { r.RemoteLinkAddress = header.EthernetBroadcastAddress } diff --git a/pkg/tcpip/transport/tcp/rcv.go b/pkg/tcpip/transport/tcp/rcv.go index bc920a03b..cfd43b5e3 100644 --- a/pkg/tcpip/transport/tcp/rcv.go +++ b/pkg/tcpip/transport/tcp/rcv.go @@ -436,6 +436,13 @@ func (r *receiver) handleTimeWaitSegment(s *segment) (resetTimeWait bool, newSyn // Just silently drop any RST packets in TIME_WAIT. We do not support // TIME_WAIT assasination as a result we confirm w/ fix 1 as described // in https://tools.ietf.org/html/rfc1337#section-3. + // + // This behavior overrides RFC793 page 70 where we transition to CLOSED + // on receiving RST, which is also default Linux behavior. + // On Linux the RST can be ignored by setting sysctl net.ipv4.tcp_rfc1337. + // + // As we do not yet support PAWS, we are being conservative in ignoring + // RSTs by default. if s.flagIsSet(header.TCPFlagRst) { return false, false } diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go index 2828b2c01..b572c39db 100644 --- a/pkg/tcpip/transport/udp/endpoint.go +++ b/pkg/tcpip/transport/udp/endpoint.go @@ -139,7 +139,7 @@ type endpoint struct { // multicastMemberships that need to be remvoed when the endpoint is // closed. Protected by the mu mutex. - multicastMemberships []multicastMembership + multicastMemberships map[multicastMembership]struct{} // effectiveNetProtos contains the network protocols actually in use. In // most cases it will only contain "netProto", but in cases like IPv6 @@ -182,12 +182,13 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQue // TTL=1. // // Linux defaults to TTL=1. - multicastTTL: 1, - multicastLoop: true, - rcvBufSizeMax: 32 * 1024, - sndBufSizeMax: 32 * 1024, - state: StateInitial, - uniqueID: s.UniqueID(), + multicastTTL: 1, + multicastLoop: true, + rcvBufSizeMax: 32 * 1024, + sndBufSizeMax: 32 * 1024, + multicastMemberships: make(map[multicastMembership]struct{}), + state: StateInitial, + uniqueID: s.UniqueID(), } // Override with stack defaults. @@ -237,10 +238,10 @@ func (e *endpoint) Close() { e.boundPortFlags = ports.Flags{} } - for _, mem := range e.multicastMemberships { + for mem := range e.multicastMemberships { e.stack.LeaveGroup(e.NetProto, mem.nicID, mem.multicastAddr) } - e.multicastMemberships = nil + e.multicastMemberships = make(map[multicastMembership]struct{}) // Close the receive list and drain it. e.rcvMu.Lock() @@ -752,17 +753,15 @@ func (e *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error { e.mu.Lock() defer e.mu.Unlock() - for _, mem := range e.multicastMemberships { - if mem == memToInsert { - return tcpip.ErrPortInUse - } + if _, ok := e.multicastMemberships[memToInsert]; ok { + return tcpip.ErrPortInUse } if err := e.stack.JoinGroup(e.NetProto, nicID, v.MulticastAddr); err != nil { return err } - e.multicastMemberships = append(e.multicastMemberships, memToInsert) + e.multicastMemberships[memToInsert] = struct{}{} case *tcpip.RemoveMembershipOption: if !header.IsV4MulticastAddress(v.MulticastAddr) && !header.IsV6MulticastAddress(v.MulticastAddr) { @@ -786,18 +785,11 @@ func (e *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error { } memToRemove := multicastMembership{nicID: nicID, multicastAddr: v.MulticastAddr} - memToRemoveIndex := -1 e.mu.Lock() defer e.mu.Unlock() - for i, mem := range e.multicastMemberships { - if mem == memToRemove { - memToRemoveIndex = i - break - } - } - if memToRemoveIndex == -1 { + if _, ok := e.multicastMemberships[memToRemove]; !ok { return tcpip.ErrBadLocalAddress } @@ -805,8 +797,7 @@ func (e *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error { return err } - e.multicastMemberships[memToRemoveIndex] = e.multicastMemberships[len(e.multicastMemberships)-1] - e.multicastMemberships = e.multicastMemberships[:len(e.multicastMemberships)-1] + delete(e.multicastMemberships, memToRemove) case *tcpip.BindToDeviceOption: id := tcpip.NICID(*v) diff --git a/pkg/tcpip/transport/udp/endpoint_state.go b/pkg/tcpip/transport/udp/endpoint_state.go index 851e6b635..858c99a45 100644 --- a/pkg/tcpip/transport/udp/endpoint_state.go +++ b/pkg/tcpip/transport/udp/endpoint_state.go @@ -92,7 +92,7 @@ func (e *endpoint) Resume(s *stack.Stack) { e.stack = s - for _, m := range e.multicastMemberships { + for m := range e.multicastMemberships { if err := e.stack.JoinGroup(e.NetProto, m.nicID, m.multicastAddr); err != nil { panic(err) } diff --git a/pkg/tcpip/transport/udp/udp_test.go b/pkg/tcpip/transport/udp/udp_test.go index 0cbc045d8..d5881d183 100644 --- a/pkg/tcpip/transport/udp/udp_test.go +++ b/pkg/tcpip/transport/udp/udp_test.go @@ -2343,8 +2343,10 @@ func TestOutgoingSubnetBroadcast(t *testing.T) { NIC: nicID1, }, }, - remoteAddr: remNetSubnetBcast, - requiresBroadcastOpt: true, + remoteAddr: remNetSubnetBcast, + // TODO(gvisor.dev/issue/3938): Once we support marking a route as + // broadcast, this test should require the broadcast option to be set. + requiresBroadcastOpt: false, }, } diff --git a/scripts/packetdrill_tests.sh b/scripts/packetdrill_tests.sh index 1a8181ac8..cdb98c834 100755 --- a/scripts/packetdrill_tests.sh +++ b/scripts/packetdrill_tests.sh @@ -16,8 +16,8 @@ source $(dirname $0)/common.sh -make load-packetdrill +QUERY_RESULT=$(query 'attr(tags, manual, tests(//test/packetdrill/...))') install_runsc_for_test runsc-d -QUERY_RESULT=$(query "attr(tags, manual, tests(//test/packetdrill/...))") +make load-packetdrill test_runsc $QUERY_RESULT diff --git a/scripts/packetimpact_tests.sh b/scripts/packetimpact_tests.sh index 77fb84bc3..4878b72f4 100755 --- a/scripts/packetimpact_tests.sh +++ b/scripts/packetimpact_tests.sh @@ -16,8 +16,9 @@ source $(dirname $0)/common.sh -make load-packetimpact + +QUERY_RESULT=$(query 'attr(tags, packetimpact, tests(//test/packetimpact/...))') install_runsc_for_test runsc-d -QUERY_RESULT=$(query "attr(tags, packetimpact, tests(//test/packetimpact/...))") +make load-packetimpact test_runsc $QUERY_RESULT diff --git a/test/packetimpact/runner/dut.go b/test/packetimpact/runner/dut.go index be7b52f18..d4c486f9c 100644 --- a/test/packetimpact/runner/dut.go +++ b/test/packetimpact/runner/dut.go @@ -171,11 +171,8 @@ func TestWithDUT(ctx context.Context, t *testing.T, mkDevice func(*dockerutil.Co }}, } - // Add ctrlNet as eth1 and testNet as eth2. - const testNetDev = "eth2" - device := mkDevice(dut) - remoteIPv6, remoteMAC, dutDeviceID := device.Prepare(ctx, t, runOpts, ctrlNet, testNet, containerAddr) + remoteIPv6, remoteMAC, dutDeviceID, testNetDev := device.Prepare(ctx, t, runOpts, ctrlNet, testNet, containerAddr) // Create the Docker container for the testbench. testbench := dockerutil.MakeNativeContainer(ctx, logger("testbench")) @@ -282,8 +279,8 @@ func TestWithDUT(ctx context.Context, t *testing.T, mkDevice func(*dockerutil.Co // DUT describes how to setup/teardown the dut for packetimpact tests. type DUT interface { // Prepare prepares the dut, starts posix_server and returns the IPv6, MAC - // address and the interface ID for the testNet on DUT. - Prepare(ctx context.Context, t *testing.T, runOpts dockerutil.RunOpts, ctrlNet, testNet *dockerutil.Network, containerAddr net.IP) (net.IP, net.HardwareAddr, uint32) + // address, the interface ID, and the interface name for the testNet on DUT. + Prepare(ctx context.Context, t *testing.T, runOpts dockerutil.RunOpts, ctrlNet, testNet *dockerutil.Network, containerAddr net.IP) (net.IP, net.HardwareAddr, uint32, string) // Logs retrieves the logs from the dut. Logs(ctx context.Context) (string, error) } @@ -301,7 +298,7 @@ func NewDockerDUT(c *dockerutil.Container) DUT { } // Prepare implements DUT.Prepare. -func (dut *DockerDUT) Prepare(ctx context.Context, t *testing.T, runOpts dockerutil.RunOpts, ctrlNet, testNet *dockerutil.Network, containerAddr net.IP) (net.IP, net.HardwareAddr, uint32) { +func (dut *DockerDUT) Prepare(ctx context.Context, t *testing.T, runOpts dockerutil.RunOpts, ctrlNet, testNet *dockerutil.Network, containerAddr net.IP) (net.IP, net.HardwareAddr, uint32, string) { const containerPosixServerBinary = "/packetimpact/posix_server" dut.c.CopyFiles(&runOpts, "/packetimpact", "test/packetimpact/dut/posix_server") @@ -345,7 +342,9 @@ func (dut *DockerDUT) Prepare(ctx context.Context, t *testing.T, runOpts dockeru t.Fatalf("unable to set IPv6 address on container %s", dut.c.Name) } } - return remoteIPv6, dutDeviceInfo.MAC, dutDeviceInfo.ID + const testNetDev = "eth2" + + return remoteIPv6, dutDeviceInfo.MAC, dutDeviceInfo.ID, testNetDev } // Logs implements DUT.Logs. diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index e1ed0cc60..6dda05102 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -260,6 +260,18 @@ packetimpact_go_test( ) packetimpact_go_test( + name = "tcp_timewait_reset", + srcs = ["tcp_timewait_reset_test.go"], + # TODO(b/168523247): Fix netstack then remove the line below. + expect_netstack_failure = True, + deps = [ + "//pkg/tcpip/header", + "//test/packetimpact/testbench", + "@org_golang_x_sys//unix:go_default_library", + ], +) + +packetimpact_go_test( name = "icmpv6_param_problem", srcs = ["icmpv6_param_problem_test.go"], # TODO(b/153485026): Fix netstack then remove the line below. diff --git a/test/packetimpact/tests/tcp_timewait_reset_test.go b/test/packetimpact/tests/tcp_timewait_reset_test.go new file mode 100644 index 000000000..2f76a6531 --- /dev/null +++ b/test/packetimpact/tests/tcp_timewait_reset_test.go @@ -0,0 +1,68 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tcp_timewait_reset_test + +import ( + "flag" + "testing" + "time" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/test/packetimpact/testbench" +) + +func init() { + testbench.RegisterFlags(flag.CommandLine) +} + +// TestTimeWaitReset tests handling of RST when in TIME_WAIT state. +func TestTimeWaitReset(t *testing.T) { + dut := testbench.NewDUT(t) + defer dut.TearDown() + listenFD, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1 /*backlog*/) + defer dut.Close(t, listenFD) + conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) + defer conn.Close(t) + + conn.Connect(t) + acceptFD, _ := dut.Accept(t, listenFD) + + // Trigger active close. + dut.Close(t, acceptFD) + + _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagFin | header.TCPFlagAck)}, time.Second) + if err != nil { + t.Fatalf("expected a FIN: %s", err) + } + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) + // Send a FIN, DUT should transition to TIME_WAIT from FIN_WAIT2. + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagFin | header.TCPFlagAck)}) + if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second); err != nil { + t.Fatalf("expected an ACK for our FIN: %s", err) + } + + // Send a RST, the DUT should transition to CLOSED from TIME_WAIT. + // This is the default Linux behavior, it can be changed to ignore RSTs via + // sysctl net.ipv4.tcp_rfc1337. + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}) + + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) + // The DUT should reply with RST to our ACK as the state should have + // transitioned to CLOSED. + if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, time.Second); err != nil { + t.Fatalf("expected a RST: %s", err) + } +} diff --git a/test/syscalls/linux/xattr.cc b/test/syscalls/linux/xattr.cc index 1a1010bb5..bd3f829c4 100644 --- a/test/syscalls/linux/xattr.cc +++ b/test/syscalls/linux/xattr.cc @@ -615,12 +615,18 @@ TEST_F(XattrTest, TrustedNamespaceWithCapSysAdmin) { SKIP_IF(IsRunningOnGvisor() && !ASSERT_NO_ERRNO_AND_VALUE(IsTmpfs(test_file_name_))); - // Setting/Getting in the trusted namespace requires CAP_SYS_ADMIN. - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); - const char* path = test_file_name_.c_str(); const char name[] = "trusted.test"; + // Writing to the trusted.* xattr namespace requires CAP_SYS_ADMIN in the root + // user namespace. There's no easy way to check that, other than trying the + // operation and seeing what happens. We'll call removexattr because it's + // simplest. + if (removexattr(path, name) < 0) { + SKIP_IF(errno == EPERM); + FAIL() << "unexpected errno from removexattr: " << errno; + } + // Set. char val = 'a'; size_t size = sizeof(val); diff --git a/tools/go_marshal/defs.bzl b/tools/go_marshal/defs.bzl index 323e33882..ba98f3599 100644 --- a/tools/go_marshal/defs.bzl +++ b/tools/go_marshal/defs.bzl @@ -56,7 +56,7 @@ marshal_deps = [ "//pkg/gohacks", "//pkg/safecopy", "//pkg/usermem", - "//tools/go_marshal/marshal", + "//pkg/marshal", ] # marshal_test_deps are required by test targets. diff --git a/tools/go_marshal/gomarshal/generator.go b/tools/go_marshal/gomarshal/generator.go index 19bcd4e6a..fd7cce8fa 100644 --- a/tools/go_marshal/gomarshal/generator.go +++ b/tools/go_marshal/gomarshal/generator.go @@ -107,7 +107,7 @@ func NewGenerator(srcs []string, out, outTest, pkg string, imports []string) (*G g.imports.add("gvisor.dev/gvisor/pkg/gohacks") g.imports.add("gvisor.dev/gvisor/pkg/safecopy") g.imports.add("gvisor.dev/gvisor/pkg/usermem") - g.imports.add("gvisor.dev/gvisor/tools/go_marshal/marshal") + g.imports.add("gvisor.dev/gvisor/pkg/marshal") return &g, nil } diff --git a/tools/go_marshal/test/BUILD b/tools/go_marshal/test/BUILD index 3d989823a..4b27773c2 100644 --- a/tools/go_marshal/test/BUILD +++ b/tools/go_marshal/test/BUILD @@ -35,10 +35,10 @@ go_test( srcs = ["marshal_test.go"], deps = [ ":test", + "//pkg/marshal", "//pkg/syserror", "//pkg/usermem", "//tools/go_marshal/analysis", - "//tools/go_marshal/marshal", "@com_github_google_go_cmp//cmp:go_default_library", ], ) diff --git a/tools/go_marshal/test/escape/BUILD b/tools/go_marshal/test/escape/BUILD index f74e6ffae..2981ef196 100644 --- a/tools/go_marshal/test/escape/BUILD +++ b/tools/go_marshal/test/escape/BUILD @@ -7,8 +7,8 @@ go_library( testonly = 1, srcs = ["escape.go"], deps = [ + "//pkg/marshal", "//pkg/usermem", - "//tools/go_marshal/marshal", "//tools/go_marshal/test", ], ) diff --git a/tools/go_marshal/test/escape/escape.go b/tools/go_marshal/test/escape/escape.go index 3a1a64e9c..ff23d87d1 100644 --- a/tools/go_marshal/test/escape/escape.go +++ b/tools/go_marshal/test/escape/escape.go @@ -15,8 +15,8 @@ package escape import ( + "gvisor.dev/gvisor/pkg/marshal" "gvisor.dev/gvisor/pkg/usermem" - "gvisor.dev/gvisor/tools/go_marshal/marshal" "gvisor.dev/gvisor/tools/go_marshal/test" ) diff --git a/tools/go_marshal/test/marshal_test.go b/tools/go_marshal/test/marshal_test.go index 16829ee45..7c3481ac8 100644 --- a/tools/go_marshal/test/marshal_test.go +++ b/tools/go_marshal/test/marshal_test.go @@ -27,10 +27,10 @@ import ( "unsafe" "github.com/google/go-cmp/cmp" + "gvisor.dev/gvisor/pkg/marshal" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/tools/go_marshal/analysis" - "gvisor.dev/gvisor/tools/go_marshal/marshal" "gvisor.dev/gvisor/tools/go_marshal/test" ) diff --git a/tools/nogo/nogo.go b/tools/nogo/nogo.go index 40e48540d..120fdcff5 100644 --- a/tools/nogo/nogo.go +++ b/tools/nogo/nogo.go @@ -202,29 +202,41 @@ func checkStdlib(config *stdlibConfig, ac map[*analysis.Analyzer]matcher) ([]str config.Srcs[i] = path.Clean(config.Srcs[i]) } - // Calculate the root directory. - longestPrefix := path.Dir(config.Srcs[0]) - for _, file := range config.Srcs[1:] { - for i := 0; i < len(file) && i < len(longestPrefix); i++ { - if file[i] != longestPrefix[i] { - // Truncate here; will stop the loop. - longestPrefix = longestPrefix[:i] - break - } + // Calculate the root source directory. This is always a directory + // named 'src', of which we simply take the first we find. This is a + // bit fragile, but works for all currently known Go source + // configurations. + // + // Note that there may be extra files outside of the root source + // directory; we simply ignore those. + rootSrcPrefix := "" + for _, file := range config.Srcs { + const src = "/src/" + i := strings.Index(file, src) + if i == -1 { + // Superfluous file. + continue } - } - if len(longestPrefix) > 0 && longestPrefix[len(longestPrefix)-1] != '/' { - longestPrefix += "/" + + // Index of first character after /src/. + i += len(src) + rootSrcPrefix = file[:i] + break } // Aggregate all files by directory. packages := make(map[string]*packageConfig) for _, file := range config.Srcs { + if !strings.HasPrefix(file, rootSrcPrefix) { + // Superflouous file. + continue + } + d := path.Dir(file) - if len(longestPrefix) >= len(d) { + if len(rootSrcPrefix) >= len(d) { continue // Not a file. } - pkg := path.Dir(file)[len(longestPrefix):] + pkg := d[len(rootSrcPrefix):] // Skip cmd packages and obvious test files: see above. if strings.HasPrefix(pkg, "cmd/") || strings.HasSuffix(file, "_test.go") { continue @@ -303,6 +315,11 @@ func checkStdlib(config *stdlibConfig, ac map[*analysis.Analyzer]matcher) ([]str checkOne(pkg) } + // Sanity check. + if len(stdlibFacts) == 0 { + return nil, nil, fmt.Errorf("no stdlib facts found: misconfiguration?") + } + // Write out all findings. factData, err := json.Marshal(stdlibFacts) if err != nil { |