summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorKevin Krakauer <krakauer@google.com>2020-02-05 14:43:11 -0800
committerKevin Krakauer <krakauer@google.com>2020-02-05 14:43:11 -0800
commitbf0ea204e9415a181c63ee10078cca753df14f7e (patch)
treef4d149ac2bf2ea900f8b2a0cdea0a7a2ea7e6d0e
parent29ad5762e4549d961f48c65292cfdeb7256524f6 (diff)
parentf2d3efca1deded31a2929ea77c0eecf476764660 (diff)
Merge branch 'master' into tcp-matchers-submit
-rw-r--r--kokoro/kythe/generate_xrefs.sh2
-rw-r--r--kokoro/packetdrill_tests.cfg9
-rw-r--r--kokoro/runtime_tests/go1.12.cfg6
-rw-r--r--kokoro/runtime_tests/java11.cfg6
-rw-r--r--kokoro/runtime_tests/nodejs12.4.0.cfg6
-rw-r--r--kokoro/runtime_tests/php7.3.6.cfg6
-rw-r--r--kokoro/runtime_tests/python3.7.3.cfg6
-rwxr-xr-xkokoro/runtime_tests/runtime_tests.sh (renamed from scripts/runtime_tests.sh)0
-rw-r--r--pkg/safecopy/safecopy_test.go88
-rw-r--r--pkg/safecopy/safecopy_unsafe.go98
-rw-r--r--pkg/safemem/seq_unsafe.go17
-rw-r--r--pkg/seccomp/seccomp.go20
-rw-r--r--pkg/seccomp/seccomp_rules.go6
-rw-r--r--pkg/seccomp/seccomp_test.go27
-rw-r--r--pkg/sentry/fs/fsutil/BUILD4
-rw-r--r--pkg/sentry/fs/fsutil/frame_ref_set.go13
-rw-r--r--pkg/sentry/fs/fsutil/inode_cached.go2
-rw-r--r--pkg/sentry/fs/g3doc/inotify.md16
-rw-r--r--pkg/sentry/fs/gofer/BUILD2
-rw-r--r--pkg/sentry/fs/gofer/cache_policy.go3
-rw-r--r--pkg/sentry/fs/gofer/fifo.go40
-rw-r--r--pkg/sentry/fs/gofer/gofer_test.go2
-rw-r--r--pkg/sentry/fs/gofer/path.go183
-rw-r--r--pkg/sentry/fs/gofer/session.go183
-rw-r--r--pkg/sentry/fs/gofer/session_state.go12
-rw-r--r--pkg/sentry/fs/gofer/socket.go8
-rw-r--r--pkg/sentry/fsimpl/devtmpfs/devtmpfs.go2
-rw-r--r--pkg/sentry/fsimpl/devtmpfs/devtmpfs_test.go2
-rw-r--r--pkg/sentry/fsimpl/gofer/BUILD55
-rw-r--r--pkg/sentry/fsimpl/gofer/directory.go190
-rw-r--r--pkg/sentry/fsimpl/gofer/filesystem.go1087
-rw-r--r--pkg/sentry/fsimpl/gofer/gofer.go1147
-rw-r--r--pkg/sentry/fsimpl/gofer/handle.go135
-rw-r--r--pkg/sentry/fsimpl/gofer/handle_unsafe.go66
-rw-r--r--pkg/sentry/fsimpl/gofer/p9file.go219
-rw-r--r--pkg/sentry/fsimpl/gofer/pagemath.go31
-rw-r--r--pkg/sentry/fsimpl/gofer/regular_file.go860
-rw-r--r--pkg/sentry/fsimpl/gofer/special_file.go159
-rw-r--r--pkg/sentry/fsimpl/gofer/symlink.go47
-rw-r--r--pkg/sentry/fsimpl/gofer/time.go75
-rw-r--r--pkg/sentry/fsimpl/testutil/testutil.go2
-rw-r--r--pkg/sentry/fsimpl/tmpfs/BUILD3
-rw-r--r--pkg/sentry/fsimpl/tmpfs/benchmark_test.go4
-rw-r--r--pkg/sentry/fsimpl/tmpfs/filesystem.go2
-rw-r--r--pkg/sentry/fsimpl/tmpfs/regular_file.go23
-rw-r--r--pkg/sentry/fsimpl/tmpfs/regular_file_test.go58
-rw-r--r--pkg/sentry/fsimpl/tmpfs/tmpfs.go43
-rw-r--r--pkg/sentry/inet/inet.go4
-rw-r--r--pkg/sentry/inet/test_stack.go6
-rw-r--r--pkg/sentry/mm/README.md8
-rw-r--r--pkg/sentry/socket/hostinet/socket.go23
-rw-r--r--pkg/sentry/socket/hostinet/stack.go5
-rw-r--r--pkg/sentry/socket/netfilter/netfilter.go3
-rw-r--r--pkg/sentry/socket/netlink/BUILD14
-rw-r--r--pkg/sentry/socket/netlink/message.go129
-rw-r--r--pkg/sentry/socket/netlink/message_test.go312
-rw-r--r--pkg/sentry/socket/netlink/provider.go2
-rw-r--r--pkg/sentry/socket/netlink/route/BUILD2
-rw-r--r--pkg/sentry/socket/netlink/route/protocol.go238
-rw-r--r--pkg/sentry/socket/netlink/socket.go54
-rw-r--r--pkg/sentry/socket/netlink/uevent/protocol.go2
-rw-r--r--pkg/sentry/socket/netstack/stack.go55
-rw-r--r--pkg/sentry/syscalls/linux/linux64_amd64.go27
-rw-r--r--pkg/sentry/syscalls/linux/linux64_arm64.go37
-rw-r--r--pkg/sentry/syscalls/linux/sys_timer.go2
-rw-r--r--pkg/sentry/vfs/lock/BUILD13
-rw-r--r--pkg/sentry/vfs/lock/lock.go72
-rw-r--r--pkg/sentry/vfs/mount.go3
-rw-r--r--pkg/tcpip/header/eth.go41
-rw-r--r--pkg/tcpip/header/eth_test.go34
-rw-r--r--pkg/tcpip/header/ipv6_test.go29
-rw-r--r--pkg/tcpip/iptables/types.go2
-rw-r--r--pkg/tcpip/link/channel/channel.go29
-rw-r--r--pkg/tcpip/network/arp/arp.go19
-rw-r--r--pkg/tcpip/network/ipv6/icmp.go26
-rw-r--r--pkg/tcpip/network/ipv6/icmp_test.go12
-rw-r--r--pkg/tcpip/stack/ndp.go17
-rw-r--r--pkg/tcpip/stack/ndp_test.go16
-rw-r--r--pkg/tcpip/stack/route.go4
-rw-r--r--pkg/tcpip/stack/stack.go9
-rw-r--r--pkg/tcpip/transport/tcp/accept.go4
-rw-r--r--pkg/tcpip/transport/tcp/connect.go4
-rw-r--r--pkg/tcpip/transport/tcp/testing/context/context.go6
-rw-r--r--runsc/BUILD1
-rw-r--r--runsc/boot/filter/config.go12
-rw-r--r--runsc/container/BUILD2
-rw-r--r--runsc/fsgofer/filter/config.go12
-rw-r--r--runsc/testutil/testutil.go66
-rwxr-xr-xscripts/common_build.sh12
-rwxr-xr-xscripts/packetdrill_tests.sh20
-rw-r--r--test/iptables/filter_input.go6
-rw-r--r--test/packetdrill/BUILD8
-rw-r--r--test/packetdrill/Dockerfile9
-rw-r--r--test/packetdrill/defs.bzl87
-rw-r--r--test/packetdrill/fin_wait2_timeout.pkt23
-rwxr-xr-xtest/packetdrill/packetdrill_setup.sh26
-rwxr-xr-xtest/packetdrill/packetdrill_test.sh225
-rw-r--r--test/runtimes/runner.go9
-rw-r--r--test/syscalls/BUILD10
-rw-r--r--test/syscalls/linux/32bit.cc6
-rw-r--r--test/syscalls/linux/BUILD738
-rw-r--r--test/syscalls/linux/bad.cc12
-rw-r--r--test/syscalls/linux/chroot.cc2
-rw-r--r--test/syscalls/linux/fork.cc3
-rw-r--r--test/syscalls/linux/getdents.cc11
-rw-r--r--test/syscalls/linux/ip_socket_test_util.cc27
-rw-r--r--test/syscalls/linux/ip_socket_test_util.h15
-rw-r--r--test/syscalls/linux/preadv2.cc2
-rw-r--r--test/syscalls/linux/proc.cc2
-rw-r--r--test/syscalls/linux/pwritev2.cc2
-rw-r--r--test/syscalls/linux/seccomp.cc5
-rw-r--r--test/syscalls/linux/socket_generic_stress.cc83
-rw-r--r--test/syscalls/linux/socket_inet_loopback.cc13
-rw-r--r--test/syscalls/linux/socket_netlink_route.cc296
-rw-r--r--test/syscalls/linux/socket_netlink_util.cc45
-rw-r--r--test/syscalls/linux/socket_netlink_util.h9
-rw-r--r--test/syscalls/linux/socket_test_util.cc101
-rw-r--r--test/syscalls/linux/socket_test_util.h6
-rw-r--r--test/syscalls/linux/stat.cc2
-rw-r--r--test/syscalls/linux/timers.cc18
-rw-r--r--test/syscalls/linux/xattr.cc124
-rw-r--r--test/syscalls/syscall_test_runner.go7
-rw-r--r--test/util/BUILD30
-rw-r--r--test/util/platform_util.cc5
-rw-r--r--test/util/signal_util.h14
-rw-r--r--test/util/test_util.cc2
-rw-r--r--tools/build/defs.bzl3
-rw-r--r--tools/build/tags.bzl36
-rw-r--r--tools/defs.bzl106
-rw-r--r--tools/go_marshal/gomarshal/BUILD1
-rw-r--r--tools/go_marshal/gomarshal/generator.go11
-rw-r--r--tools/go_stateify/BUILD1
-rw-r--r--tools/go_stateify/defs.bzl10
-rw-r--r--tools/go_stateify/main.go122
-rw-r--r--tools/images/BUILD4
-rw-r--r--tools/tags/BUILD11
-rw-r--r--tools/tags/tags.go89
137 files changed, 7622 insertions, 1118 deletions
diff --git a/kokoro/kythe/generate_xrefs.sh b/kokoro/kythe/generate_xrefs.sh
index 7a0fbb3cd..323b0f77b 100644
--- a/kokoro/kythe/generate_xrefs.sh
+++ b/kokoro/kythe/generate_xrefs.sh
@@ -23,7 +23,7 @@ bazel version
python3 -V
-readonly KYTHE_VERSION='v0.0.39'
+readonly KYTHE_VERSION='v0.0.41'
readonly WORKDIR="$(mktemp -d)"
readonly KYTHE_DIR="${WORKDIR}/kythe-${KYTHE_VERSION}"
if [[ -n "$KOKORO_GIT_COMMIT" ]]; then
diff --git a/kokoro/packetdrill_tests.cfg b/kokoro/packetdrill_tests.cfg
new file mode 100644
index 000000000..258d7deb4
--- /dev/null
+++ b/kokoro/packetdrill_tests.cfg
@@ -0,0 +1,9 @@
+build_file: "repo/scripts/packetdrill_tests.sh"
+
+action {
+ define_artifacts {
+ regex: "**/sponge_log.xml"
+ regex: "**/sponge_log.log"
+ regex: "**/outputs.zip"
+ }
+}
diff --git a/kokoro/runtime_tests/go1.12.cfg b/kokoro/runtime_tests/go1.12.cfg
new file mode 100644
index 000000000..024740ab2
--- /dev/null
+++ b/kokoro/runtime_tests/go1.12.cfg
@@ -0,0 +1,6 @@
+build_file: "github/kokoro/runtime_tests/runtime_tests.sh"
+
+env_vars {
+ key: "RUNTIME_TEST_NAME"
+ value: "go1.12"
+} \ No newline at end of file
diff --git a/kokoro/runtime_tests/java11.cfg b/kokoro/runtime_tests/java11.cfg
new file mode 100644
index 000000000..f01d26153
--- /dev/null
+++ b/kokoro/runtime_tests/java11.cfg
@@ -0,0 +1,6 @@
+build_file: "github/kokoro/runtime_tests/runtime_tests.sh"
+
+env_vars {
+ key: "RUNTIME_TEST_NAME"
+ value: "java11"
+} \ No newline at end of file
diff --git a/kokoro/runtime_tests/nodejs12.4.0.cfg b/kokoro/runtime_tests/nodejs12.4.0.cfg
new file mode 100644
index 000000000..d4861fb07
--- /dev/null
+++ b/kokoro/runtime_tests/nodejs12.4.0.cfg
@@ -0,0 +1,6 @@
+build_file: "github/kokoro/runtime_tests/runtime_tests.sh"
+
+env_vars {
+ key: "RUNTIME_TEST_NAME"
+ value: "nodejs12.4.0"
+} \ No newline at end of file
diff --git a/kokoro/runtime_tests/php7.3.6.cfg b/kokoro/runtime_tests/php7.3.6.cfg
new file mode 100644
index 000000000..b737ed9cb
--- /dev/null
+++ b/kokoro/runtime_tests/php7.3.6.cfg
@@ -0,0 +1,6 @@
+build_file: "github/kokoro/runtime_tests/runtime_tests.sh"
+
+env_vars {
+ key: "RUNTIME_TEST_NAME"
+ value: "php7.3.6"
+} \ No newline at end of file
diff --git a/kokoro/runtime_tests/python3.7.3.cfg b/kokoro/runtime_tests/python3.7.3.cfg
new file mode 100644
index 000000000..971fcba05
--- /dev/null
+++ b/kokoro/runtime_tests/python3.7.3.cfg
@@ -0,0 +1,6 @@
+build_file: "github/kokoro/runtime_tests/runtime_tests.sh"
+
+env_vars {
+ key: "RUNTIME_TEST_NAME"
+ value: "python3.7.3"
+} \ No newline at end of file
diff --git a/scripts/runtime_tests.sh b/kokoro/runtime_tests/runtime_tests.sh
index 9ee991e42..9ee991e42 100755
--- a/scripts/runtime_tests.sh
+++ b/kokoro/runtime_tests/runtime_tests.sh
diff --git a/pkg/safecopy/safecopy_test.go b/pkg/safecopy/safecopy_test.go
index 5818f7f9b..7f7f69d61 100644
--- a/pkg/safecopy/safecopy_test.go
+++ b/pkg/safecopy/safecopy_test.go
@@ -138,10 +138,14 @@ func TestSwapUint32Success(t *testing.T) {
func TestSwapUint32AlignmentError(t *testing.T) {
// Test that SwapUint32 returns an AlignmentError when passed an unaligned
// address.
- data := new(struct{ val uint64 })
- addr := uintptr(unsafe.Pointer(&data.val)) + 1
- want := AlignmentError{Addr: addr, Alignment: 4}
- if _, err := SwapUint32(unsafe.Pointer(addr), 1); err != want {
+ data := make([]byte, 8) // 2 * sizeof(uint32).
+ alignedIndex := uintptr(0)
+ if offset := uintptr(unsafe.Pointer(&data[0])) % 4; offset != 0 {
+ alignedIndex = 4 - offset
+ }
+ ptr := unsafe.Pointer(&data[alignedIndex+1])
+ want := AlignmentError{Addr: uintptr(ptr), Alignment: 4}
+ if _, err := SwapUint32(ptr, 1); err != want {
t.Errorf("Unexpected error: got %v, want %v", err, want)
}
}
@@ -171,10 +175,14 @@ func TestSwapUint64Success(t *testing.T) {
func TestSwapUint64AlignmentError(t *testing.T) {
// Test that SwapUint64 returns an AlignmentError when passed an unaligned
// address.
- data := new(struct{ val1, val2 uint64 })
- addr := uintptr(unsafe.Pointer(&data.val1)) + 1
- want := AlignmentError{Addr: addr, Alignment: 8}
- if _, err := SwapUint64(unsafe.Pointer(addr), 1); err != want {
+ data := make([]byte, 16) // 2 * sizeof(uint64).
+ alignedIndex := uintptr(0)
+ if offset := uintptr(unsafe.Pointer(&data[0])) % 8; offset != 0 {
+ alignedIndex = 8 - offset
+ }
+ ptr := unsafe.Pointer(&data[alignedIndex+1])
+ want := AlignmentError{Addr: uintptr(ptr), Alignment: 8}
+ if _, err := SwapUint64(ptr, 1); err != want {
t.Errorf("Unexpected error: got %v, want %v", err, want)
}
}
@@ -201,10 +209,14 @@ func TestCompareAndSwapUint32Success(t *testing.T) {
func TestCompareAndSwapUint32AlignmentError(t *testing.T) {
// Test that CompareAndSwapUint32 returns an AlignmentError when passed an
// unaligned address.
- data := new(struct{ val uint64 })
- addr := uintptr(unsafe.Pointer(&data.val)) + 1
- want := AlignmentError{Addr: addr, Alignment: 4}
- if _, err := CompareAndSwapUint32(unsafe.Pointer(addr), 0, 1); err != want {
+ data := make([]byte, 8) // 2 * sizeof(uint32).
+ alignedIndex := uintptr(0)
+ if offset := uintptr(unsafe.Pointer(&data[0])) % 4; offset != 0 {
+ alignedIndex = 4 - offset
+ }
+ ptr := unsafe.Pointer(&data[alignedIndex+1])
+ want := AlignmentError{Addr: uintptr(ptr), Alignment: 4}
+ if _, err := CompareAndSwapUint32(ptr, 0, 1); err != want {
t.Errorf("Unexpected error: got %v, want %v", err, want)
}
}
@@ -252,8 +264,8 @@ func TestCopyInSegvError(t *testing.T) {
for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ {
t.Run(fmt.Sprintf("starting copy %d bytes before SIGSEGV", bytesBeforeFault), func(t *testing.T) {
withSegvErrorTestMapping(t, func(mapping []byte) {
- secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize
- src := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault))
+ secondPage := uintptr(unsafe.Pointer(&mapping[pageSize]))
+ src := unsafe.Pointer(&mapping[pageSize-bytesBeforeFault])
dst := randBuf(pageSize)
n, err := CopyIn(dst, src)
if n != bytesBeforeFault {
@@ -276,8 +288,8 @@ func TestCopyInBusError(t *testing.T) {
for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ {
t.Run(fmt.Sprintf("starting copy %d bytes before SIGBUS", bytesBeforeFault), func(t *testing.T) {
withBusErrorTestMapping(t, func(mapping []byte) {
- secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize
- src := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault))
+ secondPage := uintptr(unsafe.Pointer(&mapping[pageSize]))
+ src := unsafe.Pointer(&mapping[pageSize-bytesBeforeFault])
dst := randBuf(pageSize)
n, err := CopyIn(dst, src)
if n != bytesBeforeFault {
@@ -300,8 +312,8 @@ func TestCopyOutSegvError(t *testing.T) {
for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ {
t.Run(fmt.Sprintf("starting copy %d bytes before SIGSEGV", bytesBeforeFault), func(t *testing.T) {
withSegvErrorTestMapping(t, func(mapping []byte) {
- secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize
- dst := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault))
+ secondPage := uintptr(unsafe.Pointer(&mapping[pageSize]))
+ dst := unsafe.Pointer(&mapping[pageSize-bytesBeforeFault])
src := randBuf(pageSize)
n, err := CopyOut(dst, src)
if n != bytesBeforeFault {
@@ -324,8 +336,8 @@ func TestCopyOutBusError(t *testing.T) {
for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ {
t.Run(fmt.Sprintf("starting copy %d bytes before SIGSEGV", bytesBeforeFault), func(t *testing.T) {
withBusErrorTestMapping(t, func(mapping []byte) {
- secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize
- dst := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault))
+ secondPage := uintptr(unsafe.Pointer(&mapping[pageSize]))
+ dst := unsafe.Pointer(&mapping[pageSize-bytesBeforeFault])
src := randBuf(pageSize)
n, err := CopyOut(dst, src)
if n != bytesBeforeFault {
@@ -348,8 +360,8 @@ func TestCopySourceSegvError(t *testing.T) {
for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ {
t.Run(fmt.Sprintf("starting copy %d bytes before SIGSEGV", bytesBeforeFault), func(t *testing.T) {
withSegvErrorTestMapping(t, func(mapping []byte) {
- secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize
- src := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault))
+ secondPage := uintptr(unsafe.Pointer(&mapping[pageSize]))
+ src := unsafe.Pointer(&mapping[pageSize-bytesBeforeFault])
dst := randBuf(pageSize)
n, err := Copy(unsafe.Pointer(&dst[0]), src, pageSize)
if n != uintptr(bytesBeforeFault) {
@@ -372,8 +384,8 @@ func TestCopySourceBusError(t *testing.T) {
for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ {
t.Run(fmt.Sprintf("starting copy %d bytes before SIGBUS", bytesBeforeFault), func(t *testing.T) {
withBusErrorTestMapping(t, func(mapping []byte) {
- secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize
- src := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault))
+ secondPage := uintptr(unsafe.Pointer(&mapping[pageSize]))
+ src := unsafe.Pointer(&mapping[pageSize-bytesBeforeFault])
dst := randBuf(pageSize)
n, err := Copy(unsafe.Pointer(&dst[0]), src, pageSize)
if n != uintptr(bytesBeforeFault) {
@@ -396,8 +408,8 @@ func TestCopyDestinationSegvError(t *testing.T) {
for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ {
t.Run(fmt.Sprintf("starting copy %d bytes before SIGSEGV", bytesBeforeFault), func(t *testing.T) {
withSegvErrorTestMapping(t, func(mapping []byte) {
- secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize
- dst := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault))
+ secondPage := uintptr(unsafe.Pointer(&mapping[pageSize]))
+ dst := unsafe.Pointer(&mapping[pageSize-bytesBeforeFault])
src := randBuf(pageSize)
n, err := Copy(dst, unsafe.Pointer(&src[0]), pageSize)
if n != uintptr(bytesBeforeFault) {
@@ -420,8 +432,8 @@ func TestCopyDestinationBusError(t *testing.T) {
for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ {
t.Run(fmt.Sprintf("starting copy %d bytes before SIGBUS", bytesBeforeFault), func(t *testing.T) {
withBusErrorTestMapping(t, func(mapping []byte) {
- secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize
- dst := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault))
+ secondPage := uintptr(unsafe.Pointer(&mapping[pageSize]))
+ dst := unsafe.Pointer(&mapping[pageSize-bytesBeforeFault])
src := randBuf(pageSize)
n, err := Copy(dst, unsafe.Pointer(&src[0]), pageSize)
if n != uintptr(bytesBeforeFault) {
@@ -444,8 +456,8 @@ func TestZeroOutSegvError(t *testing.T) {
for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ {
t.Run(fmt.Sprintf("starting write %d bytes before SIGSEGV", bytesBeforeFault), func(t *testing.T) {
withSegvErrorTestMapping(t, func(mapping []byte) {
- secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize
- dst := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault))
+ secondPage := uintptr(unsafe.Pointer(&mapping[pageSize]))
+ dst := unsafe.Pointer(&mapping[pageSize-bytesBeforeFault])
n, err := ZeroOut(dst, pageSize)
if n != uintptr(bytesBeforeFault) {
t.Errorf("Unexpected write length: got %v, want %v", n, bytesBeforeFault)
@@ -467,8 +479,8 @@ func TestZeroOutBusError(t *testing.T) {
for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ {
t.Run(fmt.Sprintf("starting write %d bytes before SIGBUS", bytesBeforeFault), func(t *testing.T) {
withBusErrorTestMapping(t, func(mapping []byte) {
- secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize
- dst := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault))
+ secondPage := uintptr(unsafe.Pointer(&mapping[pageSize]))
+ dst := unsafe.Pointer(&mapping[pageSize-bytesBeforeFault])
n, err := ZeroOut(dst, pageSize)
if n != uintptr(bytesBeforeFault) {
t.Errorf("Unexpected write length: got %v, want %v", n, bytesBeforeFault)
@@ -488,7 +500,7 @@ func TestSwapUint32SegvError(t *testing.T) {
// Test that SwapUint32 returns a SegvError when reaching a page that
// signals SIGSEGV.
withSegvErrorTestMapping(t, func(mapping []byte) {
- secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize
+ secondPage := uintptr(unsafe.Pointer(&mapping[pageSize]))
_, err := SwapUint32(unsafe.Pointer(secondPage), 1)
if want := (SegvError{secondPage}); err != want {
t.Errorf("Unexpected error: got %v, want %v", err, want)
@@ -500,7 +512,7 @@ func TestSwapUint32BusError(t *testing.T) {
// Test that SwapUint32 returns a BusError when reaching a page that
// signals SIGBUS.
withBusErrorTestMapping(t, func(mapping []byte) {
- secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize
+ secondPage := uintptr(unsafe.Pointer(&mapping[pageSize]))
_, err := SwapUint32(unsafe.Pointer(secondPage), 1)
if want := (BusError{secondPage}); err != want {
t.Errorf("Unexpected error: got %v, want %v", err, want)
@@ -512,7 +524,7 @@ func TestSwapUint64SegvError(t *testing.T) {
// Test that SwapUint64 returns a SegvError when reaching a page that
// signals SIGSEGV.
withSegvErrorTestMapping(t, func(mapping []byte) {
- secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize
+ secondPage := uintptr(unsafe.Pointer(&mapping[pageSize]))
_, err := SwapUint64(unsafe.Pointer(secondPage), 1)
if want := (SegvError{secondPage}); err != want {
t.Errorf("Unexpected error: got %v, want %v", err, want)
@@ -524,7 +536,7 @@ func TestSwapUint64BusError(t *testing.T) {
// Test that SwapUint64 returns a BusError when reaching a page that
// signals SIGBUS.
withBusErrorTestMapping(t, func(mapping []byte) {
- secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize
+ secondPage := uintptr(unsafe.Pointer(&mapping[pageSize]))
_, err := SwapUint64(unsafe.Pointer(secondPage), 1)
if want := (BusError{secondPage}); err != want {
t.Errorf("Unexpected error: got %v, want %v", err, want)
@@ -536,7 +548,7 @@ func TestCompareAndSwapUint32SegvError(t *testing.T) {
// Test that CompareAndSwapUint32 returns a SegvError when reaching a page
// that signals SIGSEGV.
withSegvErrorTestMapping(t, func(mapping []byte) {
- secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize
+ secondPage := uintptr(unsafe.Pointer(&mapping[pageSize]))
_, err := CompareAndSwapUint32(unsafe.Pointer(secondPage), 0, 1)
if want := (SegvError{secondPage}); err != want {
t.Errorf("Unexpected error: got %v, want %v", err, want)
@@ -548,7 +560,7 @@ func TestCompareAndSwapUint32BusError(t *testing.T) {
// Test that CompareAndSwapUint32 returns a BusError when reaching a page
// that signals SIGBUS.
withBusErrorTestMapping(t, func(mapping []byte) {
- secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize
+ secondPage := uintptr(unsafe.Pointer(&mapping[pageSize]))
_, err := CompareAndSwapUint32(unsafe.Pointer(secondPage), 0, 1)
if want := (BusError{secondPage}); err != want {
t.Errorf("Unexpected error: got %v, want %v", err, want)
diff --git a/pkg/safecopy/safecopy_unsafe.go b/pkg/safecopy/safecopy_unsafe.go
index eef028e68..41dd567f3 100644
--- a/pkg/safecopy/safecopy_unsafe.go
+++ b/pkg/safecopy/safecopy_unsafe.go
@@ -16,6 +16,7 @@ package safecopy
import (
"fmt"
+ "runtime"
"syscall"
"unsafe"
)
@@ -35,7 +36,7 @@ const maxRegisterSize = 16
// successfully copied.
//
//go:noescape
-func memcpy(dst, src unsafe.Pointer, n uintptr) (fault unsafe.Pointer, sig int32)
+func memcpy(dst, src uintptr, n uintptr) (fault uintptr, sig int32)
// memclr sets the n bytes following ptr to zeroes. If a SIGSEGV or SIGBUS
// signal is received during the write, it returns the address that caused the
@@ -47,7 +48,7 @@ func memcpy(dst, src unsafe.Pointer, n uintptr) (fault unsafe.Pointer, sig int32
// successfully written.
//
//go:noescape
-func memclr(ptr unsafe.Pointer, n uintptr) (fault unsafe.Pointer, sig int32)
+func memclr(ptr uintptr, n uintptr) (fault uintptr, sig int32)
// swapUint32 atomically stores new into *ptr and returns (the previous *ptr
// value, 0). If a SIGSEGV or SIGBUS signal is received during the swap, the
@@ -90,29 +91,35 @@ func loadUint32(ptr unsafe.Pointer) (val uint32, sig int32)
// CopyIn copies len(dst) bytes from src to dst. It returns the number of bytes
// copied and an error if SIGSEGV or SIGBUS is received while reading from src.
func CopyIn(dst []byte, src unsafe.Pointer) (int, error) {
+ n, err := copyIn(dst, uintptr(src))
+ runtime.KeepAlive(src)
+ return n, err
+}
+
+// copyIn is the underlying definition for CopyIn.
+func copyIn(dst []byte, src uintptr) (int, error) {
toCopy := uintptr(len(dst))
if len(dst) == 0 {
return 0, nil
}
- fault, sig := memcpy(unsafe.Pointer(&dst[0]), src, toCopy)
+ fault, sig := memcpy(uintptr(unsafe.Pointer(&dst[0])), src, toCopy)
if sig == 0 {
return len(dst), nil
}
- faultN, srcN := uintptr(fault), uintptr(src)
- if faultN < srcN || faultN >= srcN+toCopy {
- panic(fmt.Sprintf("CopyIn raised signal %d at %#x, which is outside source [%#x, %#x)", sig, faultN, srcN, srcN+toCopy))
+ if fault < src || fault >= src+toCopy {
+ panic(fmt.Sprintf("CopyIn raised signal %d at %#x, which is outside source [%#x, %#x)", sig, fault, src, src+toCopy))
}
// memcpy might have ended the copy up to maxRegisterSize bytes before
// fault, if an instruction caused a memory access that straddled two
// pages, and the second one faulted. Try to copy up to the fault.
var done int
- if faultN-srcN > maxRegisterSize {
- done = int(faultN - srcN - maxRegisterSize)
+ if fault-src > maxRegisterSize {
+ done = int(fault - src - maxRegisterSize)
}
- n, err := CopyIn(dst[done:int(faultN-srcN)], unsafe.Pointer(srcN+uintptr(done)))
+ n, err := copyIn(dst[done:int(fault-src)], src+uintptr(done))
done += n
if err != nil {
return done, err
@@ -124,29 +131,35 @@ func CopyIn(dst []byte, src unsafe.Pointer) (int, error) {
// bytes done and an error if SIGSEGV or SIGBUS is received while writing to
// dst.
func CopyOut(dst unsafe.Pointer, src []byte) (int, error) {
+ n, err := copyOut(uintptr(dst), src)
+ runtime.KeepAlive(dst)
+ return n, err
+}
+
+// copyOut is the underlying definition for CopyOut.
+func copyOut(dst uintptr, src []byte) (int, error) {
toCopy := uintptr(len(src))
if toCopy == 0 {
return 0, nil
}
- fault, sig := memcpy(dst, unsafe.Pointer(&src[0]), toCopy)
+ fault, sig := memcpy(dst, uintptr(unsafe.Pointer(&src[0])), toCopy)
if sig == 0 {
return len(src), nil
}
- faultN, dstN := uintptr(fault), uintptr(dst)
- if faultN < dstN || faultN >= dstN+toCopy {
- panic(fmt.Sprintf("CopyOut raised signal %d at %#x, which is outside destination [%#x, %#x)", sig, faultN, dstN, dstN+toCopy))
+ if fault < dst || fault >= dst+toCopy {
+ panic(fmt.Sprintf("CopyOut raised signal %d at %#x, which is outside destination [%#x, %#x)", sig, fault, dst, dst+toCopy))
}
// memcpy might have ended the copy up to maxRegisterSize bytes before
// fault, if an instruction caused a memory access that straddled two
// pages, and the second one faulted. Try to copy up to the fault.
var done int
- if faultN-dstN > maxRegisterSize {
- done = int(faultN - dstN - maxRegisterSize)
+ if fault-dst > maxRegisterSize {
+ done = int(fault - dst - maxRegisterSize)
}
- n, err := CopyOut(unsafe.Pointer(dstN+uintptr(done)), src[done:int(faultN-dstN)])
+ n, err := copyOut(dst+uintptr(done), src[done:int(fault-dst)])
done += n
if err != nil {
return done, err
@@ -161,6 +174,14 @@ func CopyOut(dst unsafe.Pointer, src []byte) (int, error) {
// Data is copied in order; if [src, src+toCopy) and [dst, dst+toCopy) overlap,
// the resulting contents of dst are unspecified.
func Copy(dst, src unsafe.Pointer, toCopy uintptr) (uintptr, error) {
+ n, err := copyN(uintptr(dst), uintptr(src), toCopy)
+ runtime.KeepAlive(dst)
+ runtime.KeepAlive(src)
+ return n, err
+}
+
+// copyN is the underlying definition for Copy.
+func copyN(dst, src uintptr, toCopy uintptr) (uintptr, error) {
if toCopy == 0 {
return 0, nil
}
@@ -171,17 +192,16 @@ func Copy(dst, src unsafe.Pointer, toCopy uintptr) (uintptr, error) {
}
// Did the fault occur while reading from src or writing to dst?
- faultN, srcN, dstN := uintptr(fault), uintptr(src), uintptr(dst)
faultAfterSrc := ^uintptr(0)
- if faultN >= srcN {
- faultAfterSrc = faultN - srcN
+ if fault >= src {
+ faultAfterSrc = fault - src
}
faultAfterDst := ^uintptr(0)
- if faultN >= dstN {
- faultAfterDst = faultN - dstN
+ if fault >= dst {
+ faultAfterDst = fault - dst
}
if faultAfterSrc >= toCopy && faultAfterDst >= toCopy {
- panic(fmt.Sprintf("Copy raised signal %d at %#x, which is outside source [%#x, %#x) and destination [%#x, %#x)", sig, faultN, srcN, srcN+toCopy, dstN, dstN+toCopy))
+ panic(fmt.Sprintf("Copy raised signal %d at %#x, which is outside source [%#x, %#x) and destination [%#x, %#x)", sig, fault, src, src+toCopy, dst, dst+toCopy))
}
faultedAfter := faultAfterSrc
if faultedAfter > faultAfterDst {
@@ -195,7 +215,7 @@ func Copy(dst, src unsafe.Pointer, toCopy uintptr) (uintptr, error) {
if faultedAfter > maxRegisterSize {
done = faultedAfter - maxRegisterSize
}
- n, err := Copy(unsafe.Pointer(dstN+done), unsafe.Pointer(srcN+done), faultedAfter-done)
+ n, err := copyN(dst+done, src+done, faultedAfter-done)
done += n
if err != nil {
return done, err
@@ -206,6 +226,13 @@ func Copy(dst, src unsafe.Pointer, toCopy uintptr) (uintptr, error) {
// ZeroOut writes toZero zero bytes to dst. It returns the number of bytes
// written and an error if SIGSEGV or SIGBUS is received while writing to dst.
func ZeroOut(dst unsafe.Pointer, toZero uintptr) (uintptr, error) {
+ n, err := zeroOut(uintptr(dst), toZero)
+ runtime.KeepAlive(dst)
+ return n, err
+}
+
+// zeroOut is the underlying definition for ZeroOut.
+func zeroOut(dst uintptr, toZero uintptr) (uintptr, error) {
if toZero == 0 {
return 0, nil
}
@@ -215,19 +242,18 @@ func ZeroOut(dst unsafe.Pointer, toZero uintptr) (uintptr, error) {
return toZero, nil
}
- faultN, dstN := uintptr(fault), uintptr(dst)
- if faultN < dstN || faultN >= dstN+toZero {
- panic(fmt.Sprintf("ZeroOut raised signal %d at %#x, which is outside destination [%#x, %#x)", sig, faultN, dstN, dstN+toZero))
+ if fault < dst || fault >= dst+toZero {
+ panic(fmt.Sprintf("ZeroOut raised signal %d at %#x, which is outside destination [%#x, %#x)", sig, fault, dst, dst+toZero))
}
// memclr might have ended the write up to maxRegisterSize bytes before
// fault, if an instruction caused a memory access that straddled two
// pages, and the second one faulted. Try to write up to the fault.
var done uintptr
- if faultN-dstN > maxRegisterSize {
- done = faultN - dstN - maxRegisterSize
+ if fault-dst > maxRegisterSize {
+ done = fault - dst - maxRegisterSize
}
- n, err := ZeroOut(unsafe.Pointer(dstN+done), faultN-dstN-done)
+ n, err := zeroOut(dst+done, fault-dst-done)
done += n
if err != nil {
return done, err
@@ -243,7 +269,7 @@ func SwapUint32(ptr unsafe.Pointer, new uint32) (uint32, error) {
return 0, AlignmentError{addr, 4}
}
old, sig := swapUint32(ptr, new)
- return old, errorFromFaultSignal(ptr, sig)
+ return old, errorFromFaultSignal(uintptr(ptr), sig)
}
// SwapUint64 is equivalent to sync/atomic.SwapUint64, except that it returns
@@ -254,7 +280,7 @@ func SwapUint64(ptr unsafe.Pointer, new uint64) (uint64, error) {
return 0, AlignmentError{addr, 8}
}
old, sig := swapUint64(ptr, new)
- return old, errorFromFaultSignal(ptr, sig)
+ return old, errorFromFaultSignal(uintptr(ptr), sig)
}
// CompareAndSwapUint32 is equivalent to atomicbitops.CompareAndSwapUint32,
@@ -265,7 +291,7 @@ func CompareAndSwapUint32(ptr unsafe.Pointer, old, new uint32) (uint32, error) {
return 0, AlignmentError{addr, 4}
}
prev, sig := compareAndSwapUint32(ptr, old, new)
- return prev, errorFromFaultSignal(ptr, sig)
+ return prev, errorFromFaultSignal(uintptr(ptr), sig)
}
// LoadUint32 is like sync/atomic.LoadUint32, but operates with user memory. It
@@ -277,17 +303,17 @@ func LoadUint32(ptr unsafe.Pointer) (uint32, error) {
return 0, AlignmentError{addr, 4}
}
val, sig := loadUint32(ptr)
- return val, errorFromFaultSignal(ptr, sig)
+ return val, errorFromFaultSignal(uintptr(ptr), sig)
}
-func errorFromFaultSignal(addr unsafe.Pointer, sig int32) error {
+func errorFromFaultSignal(addr uintptr, sig int32) error {
switch sig {
case 0:
return nil
case int32(syscall.SIGSEGV):
- return SegvError{uintptr(addr)}
+ return SegvError{addr}
case int32(syscall.SIGBUS):
- return BusError{uintptr(addr)}
+ return BusError{addr}
default:
panic(fmt.Sprintf("safecopy got unexpected signal %d at address %#x", sig, addr))
}
diff --git a/pkg/safemem/seq_unsafe.go b/pkg/safemem/seq_unsafe.go
index 354a95dde..dcdfc9600 100644
--- a/pkg/safemem/seq_unsafe.go
+++ b/pkg/safemem/seq_unsafe.go
@@ -18,6 +18,7 @@ import (
"bytes"
"fmt"
"reflect"
+ "syscall"
"unsafe"
)
@@ -297,3 +298,19 @@ func ZeroSeq(dsts BlockSeq) (uint64, error) {
}
return done, nil
}
+
+// IovecsFromBlockSeq returns a []syscall.Iovec representing seq.
+func IovecsFromBlockSeq(bs BlockSeq) []syscall.Iovec {
+ iovs := make([]syscall.Iovec, 0, bs.NumBlocks())
+ for ; !bs.IsEmpty(); bs = bs.Tail() {
+ b := bs.Head()
+ iovs = append(iovs, syscall.Iovec{
+ Base: &b.ToSlice()[0],
+ Len: uint64(b.Len()),
+ })
+ // We don't need to care about b.NeedSafecopy(), because the host
+ // kernel will handle such address ranges just fine (by returning
+ // EFAULT).
+ }
+ return iovs
+}
diff --git a/pkg/seccomp/seccomp.go b/pkg/seccomp/seccomp.go
index fc36efa23..55fd6967e 100644
--- a/pkg/seccomp/seccomp.go
+++ b/pkg/seccomp/seccomp.go
@@ -219,24 +219,36 @@ func addSyscallArgsCheck(p *bpf.ProgramBuilder, rules []Rule, action linux.BPFAc
switch a := arg.(type) {
case AllowAny:
case AllowValue:
+ dataOffsetLow := seccompDataOffsetArgLow(i)
+ dataOffsetHigh := seccompDataOffsetArgHigh(i)
+ if i == RuleIP {
+ dataOffsetLow = seccompDataOffsetIPLow
+ dataOffsetHigh = seccompDataOffsetIPHigh
+ }
high, low := uint32(a>>32), uint32(a)
// assert arg_low == low
- p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, seccompDataOffsetArgLow(i))
+ p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, dataOffsetLow)
p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, low, 0, ruleViolationLabel(ruleSetIdx, sysno, ruleidx))
// assert arg_high == high
- p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, seccompDataOffsetArgHigh(i))
+ p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, dataOffsetHigh)
p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, high, 0, ruleViolationLabel(ruleSetIdx, sysno, ruleidx))
labelled = true
case GreaterThan:
+ dataOffsetLow := seccompDataOffsetArgLow(i)
+ dataOffsetHigh := seccompDataOffsetArgHigh(i)
+ if i == RuleIP {
+ dataOffsetLow = seccompDataOffsetIPLow
+ dataOffsetHigh = seccompDataOffsetIPHigh
+ }
labelGood := fmt.Sprintf("gt%v", i)
high, low := uint32(a>>32), uint32(a)
// assert arg_high < high
- p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, seccompDataOffsetArgHigh(i))
+ p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, dataOffsetHigh)
p.AddJumpFalseLabel(bpf.Jmp|bpf.Jge|bpf.K, high, 0, ruleViolationLabel(ruleSetIdx, sysno, ruleidx))
// arg_high > high
p.AddJumpFalseLabel(bpf.Jmp|bpf.Jeq|bpf.K, high, 0, ruleLabel(ruleSetIdx, sysno, ruleidx, labelGood))
// arg_low < low
- p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, seccompDataOffsetArgLow(i))
+ p.AddStmt(bpf.Ld|bpf.Abs|bpf.W, dataOffsetLow)
p.AddJumpFalseLabel(bpf.Jmp|bpf.Jgt|bpf.K, low, 0, ruleViolationLabel(ruleSetIdx, sysno, ruleidx))
p.AddLabel(ruleLabel(ruleSetIdx, sysno, ruleidx, labelGood))
labelled = true
diff --git a/pkg/seccomp/seccomp_rules.go b/pkg/seccomp/seccomp_rules.go
index 84c841d7f..06308cd29 100644
--- a/pkg/seccomp/seccomp_rules.go
+++ b/pkg/seccomp/seccomp_rules.go
@@ -62,7 +62,11 @@ func (a AllowValue) String() (s string) {
// rule := Rule {
// AllowValue(linux.ARCH_GET_FS | linux.ARCH_SET_FS), // arg0
// }
-type Rule [6]interface{}
+type Rule [7]interface{} // 6 arguments + RIP
+
+// RuleIP indicates what rules in the Rule array have to be applied to
+// instruction pointer.
+const RuleIP = 6
func (r Rule) String() (s string) {
if len(r) == 0 {
diff --git a/pkg/seccomp/seccomp_test.go b/pkg/seccomp/seccomp_test.go
index abbee7051..da5a5e4b2 100644
--- a/pkg/seccomp/seccomp_test.go
+++ b/pkg/seccomp/seccomp_test.go
@@ -388,6 +388,33 @@ func TestBasic(t *testing.T) {
},
},
},
+ {
+ ruleSets: []RuleSet{
+ {
+ Rules: SyscallRules{
+ 1: []Rule{
+ {
+ RuleIP: AllowValue(0x7aabbccdd),
+ },
+ },
+ },
+ Action: linux.SECCOMP_RET_ALLOW,
+ },
+ },
+ defaultAction: linux.SECCOMP_RET_TRAP,
+ specs: []spec{
+ {
+ desc: "IP: Syscall instruction pointer allowed",
+ data: seccompData{nr: 1, arch: linux.AUDIT_ARCH_X86_64, args: [6]uint64{}, instructionPointer: 0x7aabbccdd},
+ want: linux.SECCOMP_RET_ALLOW,
+ },
+ {
+ desc: "IP: Syscall instruction pointer disallowed",
+ data: seccompData{nr: 1, arch: linux.AUDIT_ARCH_X86_64, args: [6]uint64{}, instructionPointer: 0x711223344},
+ want: linux.SECCOMP_RET_TRAP,
+ },
+ },
+ },
} {
instrs, err := BuildProgram(test.ruleSets, test.defaultAction)
if err != nil {
diff --git a/pkg/sentry/fs/fsutil/BUILD b/pkg/sentry/fs/fsutil/BUILD
index 4ab2a384f..789369220 100644
--- a/pkg/sentry/fs/fsutil/BUILD
+++ b/pkg/sentry/fs/fsutil/BUILD
@@ -28,13 +28,13 @@ go_template_instance(
"platform": "gvisor.dev/gvisor/pkg/sentry/platform",
},
package = "fsutil",
- prefix = "frameRef",
+ prefix = "FrameRef",
template = "//pkg/segment:generic_set",
types = {
"Key": "uint64",
"Range": "platform.FileRange",
"Value": "uint64",
- "Functions": "frameRefSetFunctions",
+ "Functions": "FrameRefSetFunctions",
},
)
diff --git a/pkg/sentry/fs/fsutil/frame_ref_set.go b/pkg/sentry/fs/fsutil/frame_ref_set.go
index dd63db32b..6564fd0c6 100644
--- a/pkg/sentry/fs/fsutil/frame_ref_set.go
+++ b/pkg/sentry/fs/fsutil/frame_ref_set.go
@@ -20,24 +20,25 @@ import (
"gvisor.dev/gvisor/pkg/sentry/platform"
)
-type frameRefSetFunctions struct{}
+// FrameRefSetFunctions implements segment.Functions for FrameRefSet.
+type FrameRefSetFunctions struct{}
// MinKey implements segment.Functions.MinKey.
-func (frameRefSetFunctions) MinKey() uint64 {
+func (FrameRefSetFunctions) MinKey() uint64 {
return 0
}
// MaxKey implements segment.Functions.MaxKey.
-func (frameRefSetFunctions) MaxKey() uint64 {
+func (FrameRefSetFunctions) MaxKey() uint64 {
return math.MaxUint64
}
// ClearValue implements segment.Functions.ClearValue.
-func (frameRefSetFunctions) ClearValue(val *uint64) {
+func (FrameRefSetFunctions) ClearValue(val *uint64) {
}
// Merge implements segment.Functions.Merge.
-func (frameRefSetFunctions) Merge(_ platform.FileRange, val1 uint64, _ platform.FileRange, val2 uint64) (uint64, bool) {
+func (FrameRefSetFunctions) Merge(_ platform.FileRange, val1 uint64, _ platform.FileRange, val2 uint64) (uint64, bool) {
if val1 != val2 {
return 0, false
}
@@ -45,6 +46,6 @@ func (frameRefSetFunctions) Merge(_ platform.FileRange, val1 uint64, _ platform.
}
// Split implements segment.Functions.Split.
-func (frameRefSetFunctions) Split(_ platform.FileRange, val uint64, _ uint64) (uint64, uint64) {
+func (FrameRefSetFunctions) Split(_ platform.FileRange, val uint64, _ uint64) (uint64, uint64) {
return val, val
}
diff --git a/pkg/sentry/fs/fsutil/inode_cached.go b/pkg/sentry/fs/fsutil/inode_cached.go
index 573b8586e..800c8b4e1 100644
--- a/pkg/sentry/fs/fsutil/inode_cached.go
+++ b/pkg/sentry/fs/fsutil/inode_cached.go
@@ -111,7 +111,7 @@ type CachingInodeOperations struct {
// refs tracks active references to data in the cache.
//
// refs is protected by dataMu.
- refs frameRefSet
+ refs FrameRefSet
}
// CachingInodeOperationsOptions configures a CachingInodeOperations.
diff --git a/pkg/sentry/fs/g3doc/inotify.md b/pkg/sentry/fs/g3doc/inotify.md
index 71a577d9d..85063d4e6 100644
--- a/pkg/sentry/fs/g3doc/inotify.md
+++ b/pkg/sentry/fs/g3doc/inotify.md
@@ -112,11 +112,11 @@ attempts to queue a new event, it is already holding `fs.Watches.mu`. If we used
`Inotify.mu` to also protect the event queue, this would violate the above lock
ordering.
-[dirent]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/fs/dirent.go
-[event]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/fs/inotify_event.go
-[fd_table]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/kernel/fd_table.go
-[inode]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/fs/inode.go
-[inode_watches]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/fs/inode_inotify.go
-[inotify]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/fs/inotify.go
-[syscall_dir]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/syscalls/linux/
-[watch]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/fs/inotify_watch.go
+[dirent]: https://github.com/google/gvisor/blob/master/pkg/sentry/fs/dirent.go
+[event]: https://github.com/google/gvisor/blob/master/pkg/sentry/fs/inotify_event.go
+[fd_table]: https://github.com/google/gvisor/blob/master/pkg/sentry/kernel/fd_table.go
+[inode]: https://github.com/google/gvisor/blob/master/pkg/sentry/fs/inode.go
+[inode_watches]: https://github.com/google/gvisor/blob/master/pkg/sentry/fs/inode_inotify.go
+[inotify]: https://github.com/google/gvisor/blob/master/pkg/sentry/fs/inotify.go
+[syscall_dir]: https://github.com/google/gvisor/blob/master/pkg/sentry/syscalls/linux/
+[watch]: https://github.com/google/gvisor/blob/master/pkg/sentry/fs/inotify_watch.go
diff --git a/pkg/sentry/fs/gofer/BUILD b/pkg/sentry/fs/gofer/BUILD
index 971d3718e..fea135eea 100644
--- a/pkg/sentry/fs/gofer/BUILD
+++ b/pkg/sentry/fs/gofer/BUILD
@@ -9,6 +9,7 @@ go_library(
"cache_policy.go",
"context_file.go",
"device.go",
+ "fifo.go",
"file.go",
"file_state.go",
"fs.go",
@@ -38,6 +39,7 @@ go_library(
"//pkg/sentry/fs/fsutil",
"//pkg/sentry/fs/host",
"//pkg/sentry/kernel/auth",
+ "//pkg/sentry/kernel/pipe",
"//pkg/sentry/kernel/time",
"//pkg/sentry/memmap",
"//pkg/sentry/socket/unix/transport",
diff --git a/pkg/sentry/fs/gofer/cache_policy.go b/pkg/sentry/fs/gofer/cache_policy.go
index ebea03c42..07a564e92 100644
--- a/pkg/sentry/fs/gofer/cache_policy.go
+++ b/pkg/sentry/fs/gofer/cache_policy.go
@@ -127,6 +127,9 @@ func (cp cachePolicy) revalidate(ctx context.Context, name string, parent, child
childIops, ok := child.InodeOperations.(*inodeOperations)
if !ok {
+ if _, ok := child.InodeOperations.(*fifo); ok {
+ return false
+ }
panic(fmt.Sprintf("revalidating inode operations of unknown type %T", child.InodeOperations))
}
parentIops, ok := parent.InodeOperations.(*inodeOperations)
diff --git a/pkg/sentry/fs/gofer/fifo.go b/pkg/sentry/fs/gofer/fifo.go
new file mode 100644
index 000000000..456557058
--- /dev/null
+++ b/pkg/sentry/fs/gofer/fifo.go
@@ -0,0 +1,40 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package gofer
+
+import (
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+)
+
+// +stateify savable
+type fifo struct {
+ fs.InodeOperations
+ fileIops *inodeOperations
+}
+
+var _ fs.InodeOperations = (*fifo)(nil)
+
+// Rename implements fs.InodeOperations. It forwards the call to the underlying
+// file inode to handle the file rename. Note that file key remains the same
+// after the rename to keep the endpoint mapping.
+func (i *fifo) Rename(ctx context.Context, inode *fs.Inode, oldParent *fs.Inode, oldName string, newParent *fs.Inode, newName string, replacement bool) error {
+ return i.fileIops.Rename(ctx, inode, oldParent, oldName, newParent, newName, replacement)
+}
+
+// StatFS implements fs.InodeOperations.
+func (i *fifo) StatFS(ctx context.Context) (fs.Info, error) {
+ return i.fileIops.StatFS(ctx)
+}
diff --git a/pkg/sentry/fs/gofer/gofer_test.go b/pkg/sentry/fs/gofer/gofer_test.go
index 0c2f89ae8..2df2fe889 100644
--- a/pkg/sentry/fs/gofer/gofer_test.go
+++ b/pkg/sentry/fs/gofer/gofer_test.go
@@ -61,7 +61,7 @@ func rootTest(t *testing.T, name string, cp cachePolicy, fn func(context.Context
ctx := contexttest.Context(t)
sattr, rootInodeOperations := newInodeOperations(ctx, s, contextFile{
file: rootFile,
- }, root.QID, p9.AttrMaskAll(), root.Attr, false /* socket */)
+ }, root.QID, p9.AttrMaskAll(), root.Attr)
m := fs.NewMountSource(ctx, s, &filesystem{}, fs.MountSourceFlags{})
rootInode := fs.NewInode(ctx, rootInodeOperations, m, sattr)
diff --git a/pkg/sentry/fs/gofer/path.go b/pkg/sentry/fs/gofer/path.go
index 0c1be05ef..a35c3a23d 100644
--- a/pkg/sentry/fs/gofer/path.go
+++ b/pkg/sentry/fs/gofer/path.go
@@ -23,14 +23,24 @@ import (
"gvisor.dev/gvisor/pkg/p9"
"gvisor.dev/gvisor/pkg/sentry/device"
"gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/pipe"
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/usermem"
)
// maxFilenameLen is the maximum length of a filename. This is dictated by 9P's
// encoding of strings, which uses 2 bytes for the length prefix.
const maxFilenameLen = (1 << 16) - 1
+func changeType(mode p9.FileMode, newType p9.FileMode) p9.FileMode {
+ if newType&^p9.FileModeMask != 0 {
+ panic(fmt.Sprintf("newType contained more bits than just file mode: %x", newType))
+ }
+ clear := mode &^ p9.FileModeMask
+ return clear | newType
+}
+
// Lookup loads an Inode at name into a Dirent based on the session's cache
// policy.
func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string) (*fs.Dirent, error) {
@@ -69,8 +79,25 @@ func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string
return nil, err
}
+ if i.session().overrides != nil {
+ // Check if file belongs to a internal named pipe. Note that it doesn't need
+ // to check for sockets because it's done in newInodeOperations below.
+ deviceKey := device.MultiDeviceKey{
+ Device: p9attr.RDev,
+ SecondaryDevice: i.session().connID,
+ Inode: qids[0].Path,
+ }
+ unlock := i.session().overrides.lock()
+ if pipeInode := i.session().overrides.getPipe(deviceKey); pipeInode != nil {
+ unlock()
+ pipeInode.IncRef()
+ return fs.NewDirent(ctx, pipeInode, name), nil
+ }
+ unlock()
+ }
+
// Construct the Inode operations.
- sattr, node := newInodeOperations(ctx, i.fileState.s, newFile, qids[0], mask, p9attr, false)
+ sattr, node := newInodeOperations(ctx, i.fileState.s, newFile, qids[0], mask, p9attr)
// Construct a positive Dirent.
return fs.NewDirent(ctx, fs.NewInode(ctx, node, dir.MountSource, sattr), name), nil
@@ -138,7 +165,7 @@ func (i *inodeOperations) Create(ctx context.Context, dir *fs.Inode, name string
qid := qids[0]
// Construct the InodeOperations.
- sattr, iops := newInodeOperations(ctx, i.fileState.s, unopened, qid, mask, p9attr, false)
+ sattr, iops := newInodeOperations(ctx, i.fileState.s, unopened, qid, mask, p9attr)
// Construct the positive Dirent.
d := fs.NewDirent(ctx, fs.NewInode(ctx, iops, dir.MountSource, sattr), name)
@@ -223,82 +250,115 @@ func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string,
return nil, syserror.ENAMETOOLONG
}
- if i.session().endpoints == nil {
+ if i.session().overrides == nil {
return nil, syscall.EOPNOTSUPP
}
- // Create replaces the directory fid with the newly created/opened
- // file, so clone this directory so it doesn't change out from under
- // this node.
- _, newFile, err := i.fileState.file.walk(ctx, nil)
+ // Stabilize the override map while creation is in progress.
+ unlock := i.session().overrides.lock()
+ defer unlock()
+
+ sattr, iops, err := i.createEndpointFile(ctx, dir, name, perm, p9.ModeSocket)
if err != nil {
return nil, err
}
- // We're not going to use newFile after return.
- defer newFile.close(ctx)
- // Stabilize the endpoint map while creation is in progress.
- unlock := i.session().endpoints.lock()
- defer unlock()
+ // Construct the positive Dirent.
+ childDir := fs.NewDirent(ctx, fs.NewInode(ctx, iops, dir.MountSource, sattr), name)
+ i.session().overrides.addBoundEndpoint(iops.fileState.key, childDir, ep)
+ return childDir, nil
+}
- // Create a regular file in the gofer and then mark it as a socket by
- // adding this inode key in the 'endpoints' map.
- owner := fs.FileOwnerFromContext(ctx)
- hostFile, err := newFile.create(ctx, name, p9.ReadWrite, p9.FileMode(perm.LinuxMode()), p9.UID(owner.UID), p9.GID(owner.GID))
- if err != nil {
- return nil, err
+// CreateFifo implements fs.InodeOperations.CreateFifo.
+func (i *inodeOperations) CreateFifo(ctx context.Context, dir *fs.Inode, name string, perm fs.FilePermissions) error {
+ if len(name) > maxFilenameLen {
+ return syserror.ENAMETOOLONG
}
- // We're not going to use this file.
- hostFile.Close()
- i.touchModificationAndStatusChangeTime(ctx, dir)
+ owner := fs.FileOwnerFromContext(ctx)
+ mode := p9.FileMode(perm.LinuxMode()) | p9.ModeNamedPipe
- // Get the attributes of the file to create inode key.
- qid, mask, attr, err := getattr(ctx, newFile)
- if err != nil {
- return nil, err
+ // N.B. FIFOs use major/minor numbers 0.
+ if _, err := i.fileState.file.mknod(ctx, name, mode, 0, 0, p9.UID(owner.UID), p9.GID(owner.GID)); err != nil {
+ if i.session().overrides == nil || err != syscall.EPERM {
+ return err
+ }
+ // If gofer doesn't support mknod, check if we can create an internal fifo.
+ return i.createInternalFifo(ctx, dir, name, owner, perm)
}
- key := device.MultiDeviceKey{
- Device: attr.RDev,
- SecondaryDevice: i.session().connID,
- Inode: qid.Path,
+ i.touchModificationAndStatusChangeTime(ctx, dir)
+ return nil
+}
+
+func (i *inodeOperations) createInternalFifo(ctx context.Context, dir *fs.Inode, name string, owner fs.FileOwner, perm fs.FilePermissions) error {
+ if i.session().overrides == nil {
+ return syserror.EPERM
}
- // Create child dirent.
+ // Stabilize the override map while creation is in progress.
+ unlock := i.session().overrides.lock()
+ defer unlock()
- // Get an unopened p9.File for the file we created so that it can be
- // cloned and re-opened multiple times after creation.
- _, unopened, err := i.fileState.file.walk(ctx, []string{name})
+ sattr, fileOps, err := i.createEndpointFile(ctx, dir, name, perm, p9.ModeNamedPipe)
if err != nil {
- return nil, err
+ return err
}
- // Construct the InodeOperations.
- sattr, iops := newInodeOperations(ctx, i.fileState.s, unopened, qid, mask, attr, true)
+ // First create a pipe.
+ p := pipe.NewPipe(true /* isNamed */, pipe.DefaultPipeSize, usermem.PageSize)
+
+ // Wrap the fileOps with our Fifo.
+ iops := &fifo{
+ InodeOperations: pipe.NewInodeOperations(ctx, perm, p),
+ fileIops: fileOps,
+ }
+ inode := fs.NewInode(ctx, iops, dir.MountSource, sattr)
// Construct the positive Dirent.
childDir := fs.NewDirent(ctx, fs.NewInode(ctx, iops, dir.MountSource, sattr), name)
- i.session().endpoints.add(key, childDir, ep)
- return childDir, nil
+ i.session().overrides.addPipe(fileOps.fileState.key, childDir, inode)
+ return nil
}
-// CreateFifo implements fs.InodeOperations.CreateFifo.
-func (i *inodeOperations) CreateFifo(ctx context.Context, dir *fs.Inode, name string, perm fs.FilePermissions) error {
- if len(name) > maxFilenameLen {
- return syserror.ENAMETOOLONG
+// Caller must hold Session.endpoint lock.
+func (i *inodeOperations) createEndpointFile(ctx context.Context, dir *fs.Inode, name string, perm fs.FilePermissions, fileType p9.FileMode) (fs.StableAttr, *inodeOperations, error) {
+ _, dirClone, err := i.fileState.file.walk(ctx, nil)
+ if err != nil {
+ return fs.StableAttr{}, nil, err
}
+ // We're not going to use dirClone after return.
+ defer dirClone.close(ctx)
+ // Create a regular file in the gofer and then mark it as a socket by
+ // adding this inode key in the 'overrides' map.
owner := fs.FileOwnerFromContext(ctx)
- mode := p9.FileMode(perm.LinuxMode()) | p9.ModeNamedPipe
-
- // N.B. FIFOs use major/minor numbers 0.
- if _, err := i.fileState.file.mknod(ctx, name, mode, 0, 0, p9.UID(owner.UID), p9.GID(owner.GID)); err != nil {
- return err
+ hostFile, err := dirClone.create(ctx, name, p9.ReadWrite, p9.FileMode(perm.LinuxMode()), p9.UID(owner.UID), p9.GID(owner.GID))
+ if err != nil {
+ return fs.StableAttr{}, nil, err
}
+ // We're not going to use this file.
+ hostFile.Close()
i.touchModificationAndStatusChangeTime(ctx, dir)
- return nil
+
+ // Get the attributes of the file to create inode key.
+ qid, mask, attr, err := getattr(ctx, dirClone)
+ if err != nil {
+ return fs.StableAttr{}, nil, err
+ }
+
+ // Get an unopened p9.File for the file we created so that it can be
+ // cloned and re-opened multiple times after creation.
+ _, unopened, err := i.fileState.file.walk(ctx, []string{name})
+ if err != nil {
+ return fs.StableAttr{}, nil, err
+ }
+
+ // Construct new inode with file type overridden.
+ attr.Mode = changeType(attr.Mode, fileType)
+ sattr, iops := newInodeOperations(ctx, i.fileState.s, unopened, qid, mask, attr)
+ return sattr, iops, nil
}
// Remove implements InodeOperations.Remove.
@@ -307,20 +367,23 @@ func (i *inodeOperations) Remove(ctx context.Context, dir *fs.Inode, name string
return syserror.ENAMETOOLONG
}
- var key device.MultiDeviceKey
- removeSocket := false
- if i.session().endpoints != nil {
- // Find out if file being deleted is a socket that needs to be
+ var key *device.MultiDeviceKey
+ if i.session().overrides != nil {
+ // Find out if file being deleted is a socket or pipe that needs to be
// removed from endpoint map.
if d, err := i.Lookup(ctx, dir, name); err == nil {
defer d.DecRef()
- if fs.IsSocket(d.Inode.StableAttr) {
- child := d.Inode.InodeOperations.(*inodeOperations)
- key = child.fileState.key
- removeSocket = true
- // Stabilize the endpoint map while deletion is in progress.
- unlock := i.session().endpoints.lock()
+ if fs.IsSocket(d.Inode.StableAttr) || fs.IsPipe(d.Inode.StableAttr) {
+ switch iops := d.Inode.InodeOperations.(type) {
+ case *inodeOperations:
+ key = &iops.fileState.key
+ case *fifo:
+ key = &iops.fileIops.fileState.key
+ }
+
+ // Stabilize the override map while deletion is in progress.
+ unlock := i.session().overrides.lock()
defer unlock()
}
}
@@ -329,8 +392,8 @@ func (i *inodeOperations) Remove(ctx context.Context, dir *fs.Inode, name string
if err := i.fileState.file.unlinkAt(ctx, name, 0); err != nil {
return err
}
- if removeSocket {
- i.session().endpoints.remove(key)
+ if key != nil {
+ i.session().overrides.remove(*key)
}
i.touchModificationAndStatusChangeTime(ctx, dir)
diff --git a/pkg/sentry/fs/gofer/session.go b/pkg/sentry/fs/gofer/session.go
index 498c4645a..f6b3ef178 100644
--- a/pkg/sentry/fs/gofer/session.go
+++ b/pkg/sentry/fs/gofer/session.go
@@ -33,60 +33,107 @@ import (
var DefaultDirentCacheSize uint64 = fs.DefaultDirentCacheSize
// +stateify savable
-type endpointMaps struct {
- // mu protexts the direntMap, the keyMap, and the pathMap below.
- mu sync.RWMutex `state:"nosave"`
+type overrideInfo struct {
+ dirent *fs.Dirent
+
+ // endpoint is set when dirent points to a socket. inode must not be set.
+ endpoint transport.BoundEndpoint
+
+ // inode is set when dirent points to a pipe. endpoint must not be set.
+ inode *fs.Inode
+}
- // direntMap links sockets to their dirents.
- // It is filled concurrently with the keyMap and is stored upon save.
- // Before saving, this map is used to populate the pathMap.
- direntMap map[transport.BoundEndpoint]*fs.Dirent
+func (l *overrideInfo) inodeType() fs.InodeType {
+ switch {
+ case l.endpoint != nil:
+ return fs.Socket
+ case l.inode != nil:
+ return fs.Pipe
+ }
+ panic("endpoint or node must be set")
+}
- // keyMap links MultiDeviceKeys (containing inode IDs) to their sockets.
+// +stateify savable
+type overrideMaps struct {
+ // mu protexts the keyMap, and the pathMap below.
+ mu sync.RWMutex `state:"nosave"`
+
+ // keyMap links MultiDeviceKeys (containing inode IDs) to their sockets/pipes.
// It is not stored during save because the inode ID may change upon restore.
- keyMap map[device.MultiDeviceKey]transport.BoundEndpoint `state:"nosave"`
+ keyMap map[device.MultiDeviceKey]*overrideInfo `state:"nosave"`
- // pathMap links the sockets to their paths.
+ // pathMap links the sockets/pipes to their paths.
// It is filled before saving from the direntMap and is stored upon save.
// Upon restore, this map is used to re-populate the keyMap.
- pathMap map[transport.BoundEndpoint]string
+ pathMap map[*overrideInfo]string
+}
+
+// addBoundEndpoint adds the bound endpoint to the map.
+// A reference is taken on the dirent argument.
+//
+// Precondition: maps must have been locked with 'lock'.
+func (e *overrideMaps) addBoundEndpoint(key device.MultiDeviceKey, d *fs.Dirent, ep transport.BoundEndpoint) {
+ d.IncRef()
+ e.keyMap[key] = &overrideInfo{dirent: d, endpoint: ep}
}
-// add adds the endpoint to the maps.
+// addPipe adds the pipe inode to the map.
// A reference is taken on the dirent argument.
//
// Precondition: maps must have been locked with 'lock'.
-func (e *endpointMaps) add(key device.MultiDeviceKey, d *fs.Dirent, ep transport.BoundEndpoint) {
- e.keyMap[key] = ep
+func (e *overrideMaps) addPipe(key device.MultiDeviceKey, d *fs.Dirent, inode *fs.Inode) {
d.IncRef()
- e.direntMap[ep] = d
+ e.keyMap[key] = &overrideInfo{dirent: d, inode: inode}
}
// remove deletes the key from the maps.
//
// Precondition: maps must have been locked with 'lock'.
-func (e *endpointMaps) remove(key device.MultiDeviceKey) {
- endpoint := e.get(key)
+func (e *overrideMaps) remove(key device.MultiDeviceKey) {
+ endpoint := e.keyMap[key]
delete(e.keyMap, key)
-
- d := e.direntMap[endpoint]
- d.DecRef()
- delete(e.direntMap, endpoint)
+ endpoint.dirent.DecRef()
}
// lock blocks other addition and removal operations from happening while
// the backing file is being created or deleted. Returns a function that unlocks
// the endpoint map.
-func (e *endpointMaps) lock() func() {
+func (e *overrideMaps) lock() func() {
e.mu.Lock()
return func() { e.mu.Unlock() }
}
-// get returns the endpoint mapped to the given key.
+// getBoundEndpoint returns the bound endpoint mapped to the given key.
//
-// Precondition: maps must have been locked for reading.
-func (e *endpointMaps) get(key device.MultiDeviceKey) transport.BoundEndpoint {
- return e.keyMap[key]
+// Precondition: maps must have been locked.
+func (e *overrideMaps) getBoundEndpoint(key device.MultiDeviceKey) transport.BoundEndpoint {
+ if v := e.keyMap[key]; v != nil {
+ return v.endpoint
+ }
+ return nil
+}
+
+// getPipe returns the pipe inode mapped to the given key.
+//
+// Precondition: maps must have been locked.
+func (e *overrideMaps) getPipe(key device.MultiDeviceKey) *fs.Inode {
+ if v := e.keyMap[key]; v != nil {
+ return v.inode
+ }
+ return nil
+}
+
+// getType returns the inode type if there is a corresponding endpoint for the
+// given key. Returns false otherwise.
+func (e *overrideMaps) getType(key device.MultiDeviceKey) (fs.InodeType, bool) {
+ e.mu.Lock()
+ v := e.keyMap[key]
+ e.mu.Unlock()
+
+ if v != nil {
+ return v.inodeType(), true
+ }
+ return 0, false
}
// session holds state for each 9p session established during sys_mount.
@@ -137,16 +184,16 @@ type session struct {
// mounter is the EUID/EGID that mounted this file system.
mounter fs.FileOwner `state:"wait"`
- // endpoints is used to map inodes that represent socket files to their
- // corresponding endpoint. Socket files are created as regular files in the
- // gofer and their presence in this map indicate that they should indeed be
- // socket files. This allows unix domain sockets to be used with paths that
- // belong to a gofer.
+ // overrides is used to map inodes that represent socket/pipes files to their
+ // corresponding endpoint/iops. These files are created as regular files in
+ // the gofer and their presence in this map indicate that they should indeed
+ // be socket/pipe files. This allows unix domain sockets and named pipes to
+ // be used with paths that belong to a gofer.
//
// TODO(gvisor.dev/issue/1200): there are few possible races with someone
// stat'ing the file and another deleting it concurrently, where the file
// will not be reported as socket file.
- endpoints *endpointMaps `state:"wait"`
+ overrides *overrideMaps `state:"wait"`
}
// Destroy tears down the session.
@@ -179,15 +226,21 @@ func (s *session) SaveInodeMapping(inode *fs.Inode, path string) {
// This is very unintuitive. We *CANNOT* trust the inode's StableAttrs,
// because overlay copyUp may have changed them out from under us.
// So much for "immutable".
- sattr := inode.InodeOperations.(*inodeOperations).fileState.sattr
- s.inodeMappings[sattr.InodeID] = path
+ switch iops := inode.InodeOperations.(type) {
+ case *inodeOperations:
+ s.inodeMappings[iops.fileState.sattr.InodeID] = path
+ case *fifo:
+ s.inodeMappings[iops.fileIops.fileState.sattr.InodeID] = path
+ default:
+ panic(fmt.Sprintf("Invalid type: %T", iops))
+ }
}
-// newInodeOperations creates a new 9p fs.InodeOperations backed by a p9.File and attributes
-// (p9.QID, p9.AttrMask, p9.Attr).
+// newInodeOperations creates a new 9p fs.InodeOperations backed by a p9.File
+// and attributes (p9.QID, p9.AttrMask, p9.Attr).
//
// Endpoints lock must not be held if socket == false.
-func newInodeOperations(ctx context.Context, s *session, file contextFile, qid p9.QID, valid p9.AttrMask, attr p9.Attr, socket bool) (fs.StableAttr, *inodeOperations) {
+func newInodeOperations(ctx context.Context, s *session, file contextFile, qid p9.QID, valid p9.AttrMask, attr p9.Attr) (fs.StableAttr, *inodeOperations) {
deviceKey := device.MultiDeviceKey{
Device: attr.RDev,
SecondaryDevice: s.connID,
@@ -201,17 +254,11 @@ func newInodeOperations(ctx context.Context, s *session, file contextFile, qid p
BlockSize: bsize(attr),
}
- if s.endpoints != nil {
- if socket {
- sattr.Type = fs.Socket
- } else {
- // If unix sockets are allowed on this filesystem, check if this file is
- // supposed to be a socket file.
- unlock := s.endpoints.lock()
- if s.endpoints.get(deviceKey) != nil {
- sattr.Type = fs.Socket
- }
- unlock()
+ if s.overrides != nil && sattr.Type == fs.RegularFile {
+ // If overrides are allowed on this filesystem, check if this file is
+ // supposed to be of a different type, e.g. socket.
+ if t, ok := s.overrides.getType(deviceKey); ok {
+ sattr.Type = t
}
}
@@ -267,7 +314,7 @@ func Root(ctx context.Context, dev string, filesystem fs.Filesystem, superBlockF
s.EnableLeakCheck("gofer.session")
if o.privateunixsocket {
- s.endpoints = newEndpointMaps()
+ s.overrides = newOverrideMaps()
}
// Construct the MountSource with the session and superBlockFlags.
@@ -305,26 +352,24 @@ func Root(ctx context.Context, dev string, filesystem fs.Filesystem, superBlockF
return nil, err
}
- sattr, iops := newInodeOperations(ctx, &s, s.attach, qid, valid, attr, false)
+ sattr, iops := newInodeOperations(ctx, &s, s.attach, qid, valid, attr)
return fs.NewInode(ctx, iops, m, sattr), nil
}
-// newEndpointMaps creates a new endpointMaps.
-func newEndpointMaps() *endpointMaps {
- return &endpointMaps{
- direntMap: make(map[transport.BoundEndpoint]*fs.Dirent),
- keyMap: make(map[device.MultiDeviceKey]transport.BoundEndpoint),
- pathMap: make(map[transport.BoundEndpoint]string),
+// newOverrideMaps creates a new overrideMaps.
+func newOverrideMaps() *overrideMaps {
+ return &overrideMaps{
+ keyMap: make(map[device.MultiDeviceKey]*overrideInfo),
+ pathMap: make(map[*overrideInfo]string),
}
}
-// fillKeyMap populates key and dirent maps upon restore from saved
-// pathmap.
+// fillKeyMap populates key and dirent maps upon restore from saved pathmap.
func (s *session) fillKeyMap(ctx context.Context) error {
- unlock := s.endpoints.lock()
+ unlock := s.overrides.lock()
defer unlock()
- for ep, dirPath := range s.endpoints.pathMap {
+ for ep, dirPath := range s.overrides.pathMap {
_, file, err := s.attach.walk(ctx, splitAbsolutePath(dirPath))
if err != nil {
return fmt.Errorf("error filling endpointmaps, failed to walk to %q: %v", dirPath, err)
@@ -341,25 +386,25 @@ func (s *session) fillKeyMap(ctx context.Context) error {
Inode: qid.Path,
}
- s.endpoints.keyMap[key] = ep
+ s.overrides.keyMap[key] = ep
}
return nil
}
-// fillPathMap populates paths for endpoints from dirents in direntMap
+// fillPathMap populates paths for overrides from dirents in direntMap
// before save.
func (s *session) fillPathMap() error {
- unlock := s.endpoints.lock()
+ unlock := s.overrides.lock()
defer unlock()
- for ep, dir := range s.endpoints.direntMap {
- mountRoot := dir.MountRoot()
+ for _, endpoint := range s.overrides.keyMap {
+ mountRoot := endpoint.dirent.MountRoot()
defer mountRoot.DecRef()
- dirPath, _ := dir.FullName(mountRoot)
+ dirPath, _ := endpoint.dirent.FullName(mountRoot)
if dirPath == "" {
return fmt.Errorf("error getting path from dirent")
}
- s.endpoints.pathMap[ep] = dirPath
+ s.overrides.pathMap[endpoint] = dirPath
}
return nil
}
@@ -368,7 +413,7 @@ func (s *session) fillPathMap() error {
func (s *session) restoreEndpointMaps(ctx context.Context) error {
// When restoring, only need to create the keyMap because the dirent and path
// maps got stored through the save.
- s.endpoints.keyMap = make(map[device.MultiDeviceKey]transport.BoundEndpoint)
+ s.overrides.keyMap = make(map[device.MultiDeviceKey]*overrideInfo)
if err := s.fillKeyMap(ctx); err != nil {
return fmt.Errorf("failed to insert sockets into endpoint map: %v", err)
}
@@ -376,6 +421,6 @@ func (s *session) restoreEndpointMaps(ctx context.Context) error {
// Re-create pathMap because it can no longer be trusted as socket paths can
// change while process continues to run. Empty pathMap will be re-filled upon
// next save.
- s.endpoints.pathMap = make(map[transport.BoundEndpoint]string)
+ s.overrides.pathMap = make(map[*overrideInfo]string)
return nil
}
diff --git a/pkg/sentry/fs/gofer/session_state.go b/pkg/sentry/fs/gofer/session_state.go
index 0285c5361..111da59f9 100644
--- a/pkg/sentry/fs/gofer/session_state.go
+++ b/pkg/sentry/fs/gofer/session_state.go
@@ -25,9 +25,9 @@ import (
// beforeSave is invoked by stateify.
func (s *session) beforeSave() {
- if s.endpoints != nil {
+ if s.overrides != nil {
if err := s.fillPathMap(); err != nil {
- panic("failed to save paths to endpoint map before saving" + err.Error())
+ panic("failed to save paths to override map before saving" + err.Error())
}
}
}
@@ -74,10 +74,10 @@ func (s *session) afterLoad() {
panic(fmt.Sprintf("new attach name %v, want %v", opts.aname, s.aname))
}
- // Check if endpointMaps exist when uds sockets are enabled
- // (only pathmap will actualy have been saved).
- if opts.privateunixsocket != (s.endpoints != nil) {
- panic(fmt.Sprintf("new privateunixsocket option %v, want %v", opts.privateunixsocket, s.endpoints != nil))
+ // Check if overrideMaps exist when uds sockets are enabled (only pathmaps
+ // will actually have been saved).
+ if opts.privateunixsocket != (s.overrides != nil) {
+ panic(fmt.Sprintf("new privateunixsocket option %v, want %v", opts.privateunixsocket, s.overrides != nil))
}
if args.Flags != s.superBlockFlags {
panic(fmt.Sprintf("new mount flags %v, want %v", args.Flags, s.superBlockFlags))
diff --git a/pkg/sentry/fs/gofer/socket.go b/pkg/sentry/fs/gofer/socket.go
index 376cfce2c..10ba2f5f0 100644
--- a/pkg/sentry/fs/gofer/socket.go
+++ b/pkg/sentry/fs/gofer/socket.go
@@ -32,15 +32,15 @@ func (i *inodeOperations) BoundEndpoint(inode *fs.Inode, path string) transport.
return nil
}
- if i.session().endpoints != nil {
- unlock := i.session().endpoints.lock()
+ if i.session().overrides != nil {
+ unlock := i.session().overrides.lock()
defer unlock()
- ep := i.session().endpoints.get(i.fileState.key)
+ ep := i.session().overrides.getBoundEndpoint(i.fileState.key)
if ep != nil {
return ep
}
- // Not found in endpoints map, it may be a gofer backed unix socket...
+ // Not found in overrides map, it may be a gofer backed unix socket...
}
inode.IncRef()
diff --git a/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go b/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go
index d36fa74fb..e03a0c665 100644
--- a/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go
+++ b/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go
@@ -86,7 +86,7 @@ func NewAccessor(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth
// Release must be called when a is no longer in use.
func (a *Accessor) Release() {
a.root.DecRef()
- a.mntns.DecRef(a.vfsObj)
+ a.mntns.DecRef()
}
// accessorContext implements context.Context by extending an existing
diff --git a/pkg/sentry/fsimpl/devtmpfs/devtmpfs_test.go b/pkg/sentry/fsimpl/devtmpfs/devtmpfs_test.go
index 82c58c900..73308a2b5 100644
--- a/pkg/sentry/fsimpl/devtmpfs/devtmpfs_test.go
+++ b/pkg/sentry/fsimpl/devtmpfs/devtmpfs_test.go
@@ -45,7 +45,7 @@ func TestDevtmpfs(t *testing.T) {
if err != nil {
t.Fatalf("failed to create tmpfs root mount: %v", err)
}
- defer mntns.DecRef(vfsObj)
+ defer mntns.DecRef()
root := mntns.Root()
defer root.DecRef()
devpop := vfs.PathOperation{
diff --git a/pkg/sentry/fsimpl/gofer/BUILD b/pkg/sentry/fsimpl/gofer/BUILD
new file mode 100644
index 000000000..4ba76a1e8
--- /dev/null
+++ b/pkg/sentry/fsimpl/gofer/BUILD
@@ -0,0 +1,55 @@
+load("//tools:defs.bzl", "go_library")
+load("//tools/go_generics:defs.bzl", "go_template_instance")
+
+licenses(["notice"])
+
+go_template_instance(
+ name = "dentry_list",
+ out = "dentry_list.go",
+ package = "gofer",
+ prefix = "dentry",
+ template = "//pkg/ilist:generic_list",
+ types = {
+ "Element": "*dentry",
+ "Linker": "*dentry",
+ },
+)
+
+go_library(
+ name = "gofer",
+ srcs = [
+ "dentry_list.go",
+ "directory.go",
+ "filesystem.go",
+ "gofer.go",
+ "handle.go",
+ "handle_unsafe.go",
+ "p9file.go",
+ "pagemath.go",
+ "regular_file.go",
+ "special_file.go",
+ "symlink.go",
+ "time.go",
+ ],
+ visibility = ["//pkg/sentry:internal"],
+ deps = [
+ "//pkg/abi/linux",
+ "//pkg/context",
+ "//pkg/fd",
+ "//pkg/fspath",
+ "//pkg/log",
+ "//pkg/p9",
+ "//pkg/safemem",
+ "//pkg/sentry/fs/fsutil",
+ "//pkg/sentry/kernel/auth",
+ "//pkg/sentry/kernel/time",
+ "//pkg/sentry/memmap",
+ "//pkg/sentry/pgalloc",
+ "//pkg/sentry/platform",
+ "//pkg/sentry/usage",
+ "//pkg/sentry/vfs",
+ "//pkg/syserror",
+ "//pkg/unet",
+ "//pkg/usermem",
+ ],
+)
diff --git a/pkg/sentry/fsimpl/gofer/directory.go b/pkg/sentry/fsimpl/gofer/directory.go
new file mode 100644
index 000000000..baa2cdd8e
--- /dev/null
+++ b/pkg/sentry/fsimpl/gofer/directory.go
@@ -0,0 +1,190 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package gofer
+
+import (
+ "sync"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/syserror"
+)
+
+func (d *dentry) isDir() bool {
+ return d.fileType() == linux.S_IFDIR
+}
+
+// Preconditions: d.dirMu must be locked. d.isDir(). fs.opts.interop !=
+// InteropModeShared.
+func (d *dentry) cacheNegativeChildLocked(name string) {
+ if d.negativeChildren == nil {
+ d.negativeChildren = make(map[string]struct{})
+ }
+ d.negativeChildren[name] = struct{}{}
+}
+
+type directoryFD struct {
+ fileDescription
+ vfs.DirectoryFileDescriptionDefaultImpl
+
+ mu sync.Mutex
+ off int64
+ dirents []vfs.Dirent
+}
+
+// Release implements vfs.FileDescriptionImpl.Release.
+func (fd *directoryFD) Release() {
+}
+
+// IterDirents implements vfs.FileDescriptionImpl.IterDirents.
+func (fd *directoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error {
+ fd.mu.Lock()
+ defer fd.mu.Unlock()
+
+ if fd.dirents == nil {
+ ds, err := fd.dentry().getDirents(ctx)
+ if err != nil {
+ return err
+ }
+ fd.dirents = ds
+ }
+
+ for fd.off < int64(len(fd.dirents)) {
+ if !cb.Handle(fd.dirents[fd.off]) {
+ return nil
+ }
+ fd.off++
+ }
+ return nil
+}
+
+// Preconditions: d.isDir(). There exists at least one directoryFD representing d.
+func (d *dentry) getDirents(ctx context.Context) ([]vfs.Dirent, error) {
+ // 9P2000.L's readdir does not specify behavior in the presence of
+ // concurrent mutation of an iterated directory, so implementations may
+ // duplicate or omit entries in this case, which violates POSIX semantics.
+ // Thus we read all directory entries while holding d.dirMu to exclude
+ // directory mutations. (Note that it is impossible for the client to
+ // exclude concurrent mutation from other remote filesystem users. Since
+ // there is no way to detect if the server has incorrectly omitted
+ // directory entries, we simply assume that the server is well-behaved
+ // under InteropModeShared.) This is inconsistent with Linux (which appears
+ // to assume that directory fids have the correct semantics, and translates
+ // struct file_operations::readdir calls directly to readdir RPCs), but is
+ // consistent with VFS1.
+
+ d.fs.renameMu.RLock()
+ defer d.fs.renameMu.RUnlock()
+ d.dirMu.Lock()
+ defer d.dirMu.Unlock()
+ if d.dirents != nil {
+ return d.dirents, nil
+ }
+
+ // It's not clear if 9P2000.L's readdir is expected to return "." and "..",
+ // so we generate them here.
+ parent := d.vfsd.ParentOrSelf().Impl().(*dentry)
+ dirents := []vfs.Dirent{
+ {
+ Name: ".",
+ Type: linux.DT_DIR,
+ Ino: d.ino,
+ NextOff: 1,
+ },
+ {
+ Name: "..",
+ Type: uint8(atomic.LoadUint32(&parent.mode) >> 12),
+ Ino: parent.ino,
+ NextOff: 2,
+ },
+ }
+ off := uint64(0)
+ const count = 64 * 1024 // for consistency with the vfs1 client
+ d.handleMu.RLock()
+ defer d.handleMu.RUnlock()
+ if !d.handleReadable {
+ // This should not be possible because a readable handle should have
+ // been opened when the calling directoryFD was opened.
+ panic("gofer.dentry.getDirents called without a readable handle")
+ }
+ for {
+ p9ds, err := d.handle.file.readdir(ctx, off, count)
+ if err != nil {
+ return nil, err
+ }
+ if len(p9ds) == 0 {
+ // Cache dirents for future directoryFDs if permitted.
+ if d.fs.opts.interop != InteropModeShared {
+ d.dirents = dirents
+ }
+ return dirents, nil
+ }
+ for _, p9d := range p9ds {
+ if p9d.Name == "." || p9d.Name == ".." {
+ continue
+ }
+ dirent := vfs.Dirent{
+ Name: p9d.Name,
+ Ino: p9d.QID.Path,
+ NextOff: int64(len(dirents) + 1),
+ }
+ // p9 does not expose 9P2000.U's DMDEVICE, DMNAMEDPIPE, or
+ // DMSOCKET.
+ switch p9d.Type {
+ case p9.TypeSymlink:
+ dirent.Type = linux.DT_LNK
+ case p9.TypeDir:
+ dirent.Type = linux.DT_DIR
+ default:
+ dirent.Type = linux.DT_REG
+ }
+ dirents = append(dirents, dirent)
+ }
+ off = p9ds[len(p9ds)-1].Offset
+ }
+}
+
+// Seek implements vfs.FileDescriptionImpl.Seek.
+func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
+ fd.mu.Lock()
+ defer fd.mu.Unlock()
+
+ switch whence {
+ case linux.SEEK_SET:
+ if offset < 0 {
+ return 0, syserror.EINVAL
+ }
+ if offset == 0 {
+ // Ensure that the next call to fd.IterDirents() calls
+ // fd.dentry().getDirents().
+ fd.dirents = nil
+ }
+ fd.off = offset
+ return fd.off, nil
+ case linux.SEEK_CUR:
+ offset += fd.off
+ if offset < 0 {
+ return 0, syserror.EINVAL
+ }
+ // Don't clear fd.dirents in this case, even if offset == 0.
+ fd.off = offset
+ return fd.off, nil
+ default:
+ return 0, syserror.EINVAL
+ }
+}
diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go
new file mode 100644
index 000000000..8eb61debf
--- /dev/null
+++ b/pkg/sentry/fsimpl/gofer/filesystem.go
@@ -0,0 +1,1087 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package gofer
+
+import (
+ "sync"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/fspath"
+ "gvisor.dev/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/syserror"
+)
+
+// Sync implements vfs.FilesystemImpl.Sync.
+func (fs *filesystem) Sync(ctx context.Context) error {
+ // Snapshot current dentries and special files.
+ fs.syncMu.Lock()
+ ds := make([]*dentry, 0, len(fs.dentries))
+ for d := range fs.dentries {
+ ds = append(ds, d)
+ }
+ sffds := make([]*specialFileFD, 0, len(fs.specialFileFDs))
+ for sffd := range fs.specialFileFDs {
+ sffds = append(sffds, sffd)
+ }
+ fs.syncMu.Unlock()
+
+ // Return the first error we encounter, but sync everything we can
+ // regardless.
+ var retErr error
+
+ // Sync regular files.
+ for _, d := range ds {
+ if !d.TryIncRef() {
+ continue
+ }
+ err := d.syncSharedHandle(ctx)
+ d.DecRef()
+ if err != nil && retErr == nil {
+ retErr = err
+ }
+ }
+
+ // Sync special files, which may be writable but do not use dentry shared
+ // handles (so they won't be synced by the above).
+ for _, sffd := range sffds {
+ if !sffd.vfsfd.TryIncRef() {
+ continue
+ }
+ err := sffd.Sync(ctx)
+ sffd.vfsfd.DecRef()
+ if err != nil && retErr == nil {
+ retErr = err
+ }
+ }
+
+ return retErr
+}
+
+// maxFilenameLen is the maximum length of a filename. This is dictated by 9P's
+// encoding of strings, which uses 2 bytes for the length prefix.
+const maxFilenameLen = (1 << 16) - 1
+
+// dentrySlicePool is a pool of *[]*dentry used to store dentries for which
+// dentry.checkCachingLocked() must be called. The pool holds pointers to
+// slices because Go lacks generics, so sync.Pool operates on interface{}, so
+// every call to (what should be) sync.Pool<[]*dentry>.Put() allocates a copy
+// of the slice header on the heap.
+var dentrySlicePool = sync.Pool{
+ New: func() interface{} {
+ ds := make([]*dentry, 0, 4) // arbitrary non-zero initial capacity
+ return &ds
+ },
+}
+
+func appendDentry(ds *[]*dentry, d *dentry) *[]*dentry {
+ if ds == nil {
+ ds = dentrySlicePool.Get().(*[]*dentry)
+ }
+ *ds = append(*ds, d)
+ return ds
+}
+
+// Preconditions: ds != nil.
+func putDentrySlice(ds *[]*dentry) {
+ // Allow dentries to be GC'd.
+ for i := range *ds {
+ (*ds)[i] = nil
+ }
+ *ds = (*ds)[:0]
+ dentrySlicePool.Put(ds)
+}
+
+// stepLocked resolves rp.Component() to an existing file, starting from the
+// given directory.
+//
+// Dentries which may become cached as a result of the traversal are appended
+// to *ds.
+//
+// Preconditions: fs.renameMu must be locked. d.dirMu must be locked.
+// !rp.Done(). If fs.opts.interop == InteropModeShared, then d's cached
+// metadata must be up to date.
+func (fs *filesystem) stepLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, ds **[]*dentry) (*dentry, error) {
+ if !d.isDir() {
+ return nil, syserror.ENOTDIR
+ }
+ if err := d.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil {
+ return nil, err
+ }
+afterSymlink:
+ name := rp.Component()
+ if name == "." {
+ rp.Advance()
+ return d, nil
+ }
+ if name == ".." {
+ parentVFSD, err := rp.ResolveParent(&d.vfsd)
+ if err != nil {
+ return nil, err
+ }
+ parent := parentVFSD.Impl().(*dentry)
+ if fs.opts.interop == InteropModeShared {
+ // We must assume that parentVFSD is correct, because if d has been
+ // moved elsewhere in the remote filesystem so that its parent has
+ // changed, we have no way of determining its new parent's location
+ // in the filesystem. Get updated metadata for parentVFSD.
+ _, attrMask, attr, err := parent.file.getAttr(ctx, dentryAttrMask())
+ if err != nil {
+ return nil, err
+ }
+ parent.updateFromP9Attrs(attrMask, &attr)
+ }
+ rp.Advance()
+ return parent, nil
+ }
+ childVFSD, err := rp.ResolveChild(&d.vfsd, name)
+ if err != nil {
+ return nil, err
+ }
+ // FIXME(jamieliu): Linux performs revalidation before mount lookup
+ // (fs/namei.c:lookup_fast() => __d_lookup_rcu(), d_revalidate(),
+ // __follow_mount_rcu()).
+ child, err := fs.revalidateChildLocked(ctx, rp.VirtualFilesystem(), d, name, childVFSD, ds)
+ if err != nil {
+ return nil, err
+ }
+ if child == nil {
+ return nil, syserror.ENOENT
+ }
+ if child.isSymlink() && rp.ShouldFollowSymlink() {
+ target, err := child.readlink(ctx, rp.Mount())
+ if err != nil {
+ return nil, err
+ }
+ if err := rp.HandleSymlink(target); err != nil {
+ return nil, err
+ }
+ goto afterSymlink // don't check the current directory again
+ }
+ rp.Advance()
+ return child, nil
+}
+
+// revalidateChildLocked must be called after a call to parent.vfsd.Child(name)
+// or vfs.ResolvingPath.ResolveChild(name) returns childVFSD (which may be
+// nil) to verify that the returned child (or lack thereof) is correct. If no file
+// exists at name, revalidateChildLocked returns (nil, nil).
+//
+// Preconditions: fs.renameMu must be locked. parent.dirMu must be locked.
+// parent.isDir(). name is not "." or "..".
+//
+// Postconditions: If revalidateChildLocked returns a non-nil dentry, its
+// cached metadata is up to date.
+func (fs *filesystem) revalidateChildLocked(ctx context.Context, vfsObj *vfs.VirtualFilesystem, parent *dentry, name string, childVFSD *vfs.Dentry, ds **[]*dentry) (*dentry, error) {
+ if childVFSD != nil && fs.opts.interop != InteropModeShared {
+ // We have a cached dentry that is assumed to be correct.
+ return childVFSD.Impl().(*dentry), nil
+ }
+ // We either don't have a cached dentry or need to verify that it's still
+ // correct, either of which requires a remote lookup. Check if this name is
+ // valid before performing the lookup.
+ if len(name) > maxFilenameLen {
+ return nil, syserror.ENAMETOOLONG
+ }
+ // Check if we've already cached this lookup with a negative result.
+ if _, ok := parent.negativeChildren[name]; ok {
+ return nil, nil
+ }
+ // Perform the remote lookup.
+ qid, file, attrMask, attr, err := parent.file.walkGetAttrOne(ctx, name)
+ if err != nil && err != syserror.ENOENT {
+ return nil, err
+ }
+ if childVFSD != nil {
+ child := childVFSD.Impl().(*dentry)
+ if !file.isNil() && qid.Path == child.ino {
+ // The file at this path hasn't changed. Just update cached
+ // metadata.
+ file.close(ctx)
+ child.updateFromP9Attrs(attrMask, &attr)
+ return child, nil
+ }
+ // The file at this path has changed or no longer exists. Remove
+ // the stale dentry from the tree, and re-evaluate its caching
+ // status (i.e. if it has 0 references, drop it).
+ vfsObj.ForceDeleteDentry(childVFSD)
+ *ds = appendDentry(*ds, child)
+ childVFSD = nil
+ }
+ if file.isNil() {
+ // No file exists at this path now. Cache the negative lookup if
+ // allowed.
+ if fs.opts.interop != InteropModeShared {
+ parent.cacheNegativeChildLocked(name)
+ }
+ return nil, nil
+ }
+ // Create a new dentry representing the file.
+ child, err := fs.newDentry(ctx, file, qid, attrMask, &attr)
+ if err != nil {
+ file.close(ctx)
+ return nil, err
+ }
+ parent.IncRef() // reference held by child on its parent
+ parent.vfsd.InsertChild(&child.vfsd, name)
+ // For now, child has 0 references, so our caller should call
+ // child.checkCachingLocked().
+ *ds = appendDentry(*ds, child)
+ return child, nil
+}
+
+// walkParentDirLocked resolves all but the last path component of rp to an
+// existing directory, starting from the given directory (which is usually
+// rp.Start().Impl().(*dentry)). It does not check that the returned directory
+// is searchable by the provider of rp.
+//
+// Preconditions: fs.renameMu must be locked. !rp.Done(). If fs.opts.interop ==
+// InteropModeShared, then d's cached metadata must be up to date.
+func (fs *filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, ds **[]*dentry) (*dentry, error) {
+ for !rp.Final() {
+ d.dirMu.Lock()
+ next, err := fs.stepLocked(ctx, rp, d, ds)
+ d.dirMu.Unlock()
+ if err != nil {
+ return nil, err
+ }
+ d = next
+ }
+ if !d.isDir() {
+ return nil, syserror.ENOTDIR
+ }
+ return d, nil
+}
+
+// resolveLocked resolves rp to an existing file.
+//
+// Preconditions: fs.renameMu must be locked.
+func (fs *filesystem) resolveLocked(ctx context.Context, rp *vfs.ResolvingPath, ds **[]*dentry) (*dentry, error) {
+ d := rp.Start().Impl().(*dentry)
+ if fs.opts.interop == InteropModeShared {
+ // Get updated metadata for rp.Start() as required by fs.stepLocked().
+ if err := d.updateFromGetattr(ctx); err != nil {
+ return nil, err
+ }
+ }
+ for !rp.Done() {
+ d.dirMu.Lock()
+ next, err := fs.stepLocked(ctx, rp, d, ds)
+ d.dirMu.Unlock()
+ if err != nil {
+ return nil, err
+ }
+ d = next
+ }
+ if rp.MustBeDir() && !d.isDir() {
+ return nil, syserror.ENOTDIR
+ }
+ return d, nil
+}
+
+// doCreateAt checks that creating a file at rp is permitted, then invokes
+// create to do so.
+//
+// Preconditions: !rp.Done(). For the final path component in rp,
+// !rp.ShouldFollowSymlink().
+func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir bool, create func(parent *dentry, name string) error) error {
+ var ds *[]*dentry
+ fs.renameMu.RLock()
+ defer fs.renameMuRUnlockAndCheckCaching(&ds)
+ start := rp.Start().Impl().(*dentry)
+ if fs.opts.interop == InteropModeShared {
+ // Get updated metadata for start as required by
+ // fs.walkParentDirLocked().
+ if err := start.updateFromGetattr(ctx); err != nil {
+ return err
+ }
+ }
+ parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds)
+ if err != nil {
+ return err
+ }
+ if err := parent.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true); err != nil {
+ return err
+ }
+ if parent.isDeleted() {
+ return syserror.ENOENT
+ }
+ name := rp.Component()
+ if name == "." || name == ".." {
+ return syserror.EEXIST
+ }
+ if len(name) > maxFilenameLen {
+ return syserror.ENAMETOOLONG
+ }
+ if !dir && rp.MustBeDir() {
+ return syserror.ENOENT
+ }
+ mnt := rp.Mount()
+ if err := mnt.CheckBeginWrite(); err != nil {
+ return err
+ }
+ defer mnt.EndWrite()
+ parent.dirMu.Lock()
+ defer parent.dirMu.Unlock()
+ if fs.opts.interop == InteropModeShared {
+ // The existence of a dentry at name would be inconclusive because the
+ // file it represents may have been deleted from the remote filesystem,
+ // so we would need to make an RPC to revalidate the dentry. Just
+ // attempt the file creation RPC instead. If a file does exist, the RPC
+ // will fail with EEXIST like we would have. If the RPC succeeds, and a
+ // stale dentry exists, the dentry will fail revalidation next time
+ // it's used.
+ return create(parent, name)
+ }
+ if parent.vfsd.Child(name) != nil {
+ return syserror.EEXIST
+ }
+ // No cached dentry exists; however, there might still be an existing file
+ // at name. As above, we attempt the file creation RPC anyway.
+ if err := create(parent, name); err != nil {
+ return err
+ }
+ parent.touchCMtime(ctx)
+ delete(parent.negativeChildren, name)
+ parent.dirents = nil
+ return nil
+}
+
+// Preconditions: !rp.Done().
+func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir bool) error {
+ var ds *[]*dentry
+ fs.renameMu.RLock()
+ defer fs.renameMuRUnlockAndCheckCaching(&ds)
+ start := rp.Start().Impl().(*dentry)
+ if fs.opts.interop == InteropModeShared {
+ // Get updated metadata for start as required by
+ // fs.walkParentDirLocked().
+ if err := start.updateFromGetattr(ctx); err != nil {
+ return err
+ }
+ }
+ parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds)
+ if err != nil {
+ return err
+ }
+ if err := parent.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true); err != nil {
+ return err
+ }
+ if err := rp.Mount().CheckBeginWrite(); err != nil {
+ return err
+ }
+ defer rp.Mount().EndWrite()
+
+ name := rp.Component()
+ if dir {
+ if name == "." {
+ return syserror.EINVAL
+ }
+ if name == ".." {
+ return syserror.ENOTEMPTY
+ }
+ } else {
+ if name == "." || name == ".." {
+ return syserror.EISDIR
+ }
+ }
+ vfsObj := rp.VirtualFilesystem()
+ mntns := vfs.MountNamespaceFromContext(ctx)
+ parent.dirMu.Lock()
+ defer parent.dirMu.Unlock()
+ childVFSD := parent.vfsd.Child(name)
+ var child *dentry
+ // We only need a dentry representing the file at name if it can be a mount
+ // point. If childVFSD is nil, then it can't be a mount point. If childVFSD
+ // is non-nil but stale, the actual file can't be a mount point either; we
+ // detect this case by just speculatively calling PrepareDeleteDentry and
+ // only revalidating the dentry if that fails (indicating that the existing
+ // dentry is a mount point).
+ if childVFSD != nil {
+ child = childVFSD.Impl().(*dentry)
+ if err := vfsObj.PrepareDeleteDentry(mntns, childVFSD); err != nil {
+ child, err = fs.revalidateChildLocked(ctx, vfsObj, parent, name, childVFSD, &ds)
+ if err != nil {
+ return err
+ }
+ if child != nil {
+ childVFSD = &child.vfsd
+ if err := vfsObj.PrepareDeleteDentry(mntns, childVFSD); err != nil {
+ return err
+ }
+ } else {
+ childVFSD = nil
+ }
+ }
+ } else if _, ok := parent.negativeChildren[name]; ok {
+ return syserror.ENOENT
+ }
+ flags := uint32(0)
+ if dir {
+ if child != nil && !child.isDir() {
+ return syserror.ENOTDIR
+ }
+ flags = linux.AT_REMOVEDIR
+ } else {
+ if child != nil && child.isDir() {
+ return syserror.EISDIR
+ }
+ if rp.MustBeDir() {
+ return syserror.ENOTDIR
+ }
+ }
+ err = parent.file.unlinkAt(ctx, name, flags)
+ if err != nil {
+ if childVFSD != nil {
+ vfsObj.AbortDeleteDentry(childVFSD)
+ }
+ return err
+ }
+ if fs.opts.interop != InteropModeShared {
+ parent.touchCMtime(ctx)
+ parent.cacheNegativeChildLocked(name)
+ parent.dirents = nil
+ }
+ if child != nil {
+ child.setDeleted()
+ vfsObj.CommitDeleteDentry(childVFSD)
+ ds = appendDentry(ds, child)
+ }
+ return nil
+}
+
+// renameMuRUnlockAndCheckCaching calls fs.renameMu.RUnlock(), then calls
+// dentry.checkCachingLocked on all dentries in *ds with fs.renameMu locked for
+// writing.
+//
+// ds is a pointer-to-pointer since defer evaluates its arguments immediately,
+// but dentry slices are allocated lazily, and it's much easier to say "defer
+// fs.renameMuRUnlockAndCheckCaching(&ds)" than "defer func() {
+// fs.renameMuRUnlockAndCheckCaching(ds) }()" to work around this.
+func (fs *filesystem) renameMuRUnlockAndCheckCaching(ds **[]*dentry) {
+ fs.renameMu.RUnlock()
+ if *ds == nil {
+ return
+ }
+ if len(**ds) != 0 {
+ fs.renameMu.Lock()
+ for _, d := range **ds {
+ d.checkCachingLocked()
+ }
+ fs.renameMu.Unlock()
+ }
+ putDentrySlice(*ds)
+}
+
+func (fs *filesystem) renameMuUnlockAndCheckCaching(ds **[]*dentry) {
+ if *ds == nil {
+ fs.renameMu.Unlock()
+ return
+ }
+ for _, d := range **ds {
+ d.checkCachingLocked()
+ }
+ fs.renameMu.Unlock()
+ putDentrySlice(*ds)
+}
+
+// GetDentryAt implements vfs.FilesystemImpl.GetDentryAt.
+func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) {
+ var ds *[]*dentry
+ fs.renameMu.RLock()
+ defer fs.renameMuRUnlockAndCheckCaching(&ds)
+ d, err := fs.resolveLocked(ctx, rp, &ds)
+ if err != nil {
+ return nil, err
+ }
+ if opts.CheckSearchable {
+ if !d.isDir() {
+ return nil, syserror.ENOTDIR
+ }
+ if err := d.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil {
+ return nil, err
+ }
+ }
+ d.IncRef()
+ return &d.vfsd, nil
+}
+
+// GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt.
+func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) {
+ var ds *[]*dentry
+ fs.renameMu.RLock()
+ defer fs.renameMuRUnlockAndCheckCaching(&ds)
+ start := rp.Start().Impl().(*dentry)
+ if fs.opts.interop == InteropModeShared {
+ // Get updated metadata for start as required by
+ // fs.walkParentDirLocked().
+ if err := start.updateFromGetattr(ctx); err != nil {
+ return nil, err
+ }
+ }
+ d, err := fs.walkParentDirLocked(ctx, rp, start, &ds)
+ if err != nil {
+ return nil, err
+ }
+ d.IncRef()
+ return &d.vfsd, nil
+}
+
+// LinkAt implements vfs.FilesystemImpl.LinkAt.
+func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error {
+ return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, childName string) error {
+ if rp.Mount() != vd.Mount() {
+ return syserror.EXDEV
+ }
+ // 9P2000.L supports hard links, but we don't.
+ return syserror.EPERM
+ })
+}
+
+// MkdirAt implements vfs.FilesystemImpl.MkdirAt.
+func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error {
+ return fs.doCreateAt(ctx, rp, true /* dir */, func(parent *dentry, name string) error {
+ creds := rp.Credentials()
+ _, err := parent.file.mkdir(ctx, name, (p9.FileMode)(opts.Mode), (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID))
+ return err
+ })
+}
+
+// MknodAt implements vfs.FilesystemImpl.MknodAt.
+func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error {
+ return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, name string) error {
+ creds := rp.Credentials()
+ _, err := parent.file.mknod(ctx, name, (p9.FileMode)(opts.Mode), opts.DevMajor, opts.DevMinor, (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID))
+ return err
+ })
+}
+
+// OpenAt implements vfs.FilesystemImpl.OpenAt.
+func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+ // Reject O_TMPFILE, which is not supported; supporting it correctly in the
+ // presence of other remote filesystem users requires remote filesystem
+ // support, and it isn't clear that there's any way to implement this in
+ // 9P.
+ if opts.Flags&linux.O_TMPFILE != 0 {
+ return nil, syserror.EOPNOTSUPP
+ }
+ mayCreate := opts.Flags&linux.O_CREAT != 0
+ mustCreate := opts.Flags&(linux.O_CREAT|linux.O_EXCL) == (linux.O_CREAT | linux.O_EXCL)
+
+ var ds *[]*dentry
+ fs.renameMu.RLock()
+ defer fs.renameMuRUnlockAndCheckCaching(&ds)
+
+ start := rp.Start().Impl().(*dentry)
+ if fs.opts.interop == InteropModeShared {
+ // Get updated metadata for start as required by fs.stepLocked().
+ if err := start.updateFromGetattr(ctx); err != nil {
+ return nil, err
+ }
+ }
+ if rp.Done() {
+ return start.openLocked(ctx, rp, opts.Flags)
+ }
+
+afterTrailingSymlink:
+ parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds)
+ if err != nil {
+ return nil, err
+ }
+ // Check for search permission in the parent directory.
+ if err := parent.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil {
+ return nil, err
+ }
+ // Determine whether or not we need to create a file.
+ parent.dirMu.Lock()
+ child, err := fs.stepLocked(ctx, rp, parent, &ds)
+ if err == syserror.ENOENT && mayCreate {
+ fd, err := parent.createAndOpenChildLocked(ctx, rp, &opts)
+ parent.dirMu.Unlock()
+ return fd, err
+ }
+ if err != nil {
+ parent.dirMu.Unlock()
+ return nil, err
+ }
+ // Open existing child or follow symlink.
+ parent.dirMu.Unlock()
+ if mustCreate {
+ return nil, syserror.EEXIST
+ }
+ if child.isSymlink() && rp.ShouldFollowSymlink() {
+ target, err := child.readlink(ctx, rp.Mount())
+ if err != nil {
+ return nil, err
+ }
+ if err := rp.HandleSymlink(target); err != nil {
+ return nil, err
+ }
+ start = parent
+ goto afterTrailingSymlink
+ }
+ return child.openLocked(ctx, rp, opts.Flags)
+}
+
+// Preconditions: fs.renameMu must be locked.
+func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, flags uint32) (*vfs.FileDescription, error) {
+ ats := vfs.AccessTypesForOpenFlags(flags)
+ if err := d.checkPermissions(rp.Credentials(), ats, d.isDir()); err != nil {
+ return nil, err
+ }
+ mnt := rp.Mount()
+ filetype := d.fileType()
+ switch {
+ case filetype == linux.S_IFREG && !d.fs.opts.regularFilesUseSpecialFileFD:
+ if err := d.ensureSharedHandle(ctx, ats&vfs.MayRead != 0, ats&vfs.MayWrite != 0, flags&linux.O_TRUNC != 0); err != nil {
+ return nil, err
+ }
+ fd := &regularFileFD{}
+ if err := fd.vfsfd.Init(fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{
+ AllowDirectIO: true,
+ }); err != nil {
+ return nil, err
+ }
+ return &fd.vfsfd, nil
+ case filetype == linux.S_IFDIR:
+ // Can't open directories with O_CREAT.
+ if flags&linux.O_CREAT != 0 {
+ return nil, syserror.EISDIR
+ }
+ // Can't open directories writably.
+ if ats&vfs.MayWrite != 0 {
+ return nil, syserror.EISDIR
+ }
+ if flags&linux.O_DIRECT != 0 {
+ return nil, syserror.EINVAL
+ }
+ if err := d.ensureSharedHandle(ctx, ats&vfs.MayRead != 0, false /* write */, false /* trunc */); err != nil {
+ return nil, err
+ }
+ fd := &directoryFD{}
+ if err := fd.vfsfd.Init(fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil {
+ return nil, err
+ }
+ return &fd.vfsfd, nil
+ case filetype == linux.S_IFLNK:
+ // Can't open symlinks without O_PATH (which is unimplemented).
+ return nil, syserror.ELOOP
+ default:
+ if flags&linux.O_DIRECT != 0 {
+ return nil, syserror.EINVAL
+ }
+ h, err := openHandle(ctx, d.file, ats&vfs.MayRead != 0, ats&vfs.MayWrite != 0, flags&linux.O_TRUNC != 0)
+ if err != nil {
+ return nil, err
+ }
+ fd := &specialFileFD{
+ handle: h,
+ }
+ if err := fd.vfsfd.Init(fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil {
+ h.close(ctx)
+ return nil, err
+ }
+ return &fd.vfsfd, nil
+ }
+}
+
+// Preconditions: d.fs.renameMu must be locked. d.dirMu must be locked.
+func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.OpenOptions) (*vfs.FileDescription, error) {
+ if err := d.checkPermissions(rp.Credentials(), vfs.MayWrite, true); err != nil {
+ return nil, err
+ }
+ if d.isDeleted() {
+ return nil, syserror.ENOENT
+ }
+ mnt := rp.Mount()
+ if err := mnt.CheckBeginWrite(); err != nil {
+ return nil, err
+ }
+ defer mnt.EndWrite()
+
+ // 9P2000.L's lcreate takes a fid representing the parent directory, and
+ // converts it into an open fid representing the created file, so we need
+ // to duplicate the directory fid first.
+ _, dirfile, err := d.file.walk(ctx, nil)
+ if err != nil {
+ return nil, err
+ }
+ creds := rp.Credentials()
+ name := rp.Component()
+ fdobj, openFile, createQID, _, err := dirfile.create(ctx, name, (p9.OpenFlags)(opts.Flags), (p9.FileMode)(opts.Mode), (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID))
+ if err != nil {
+ dirfile.close(ctx)
+ return nil, err
+ }
+ // Then we need to walk to the file we just created to get a non-open fid
+ // representing it, and to get its metadata. This must use d.file since, as
+ // explained above, dirfile was invalidated by dirfile.Create().
+ walkQID, nonOpenFile, attrMask, attr, err := d.file.walkGetAttrOne(ctx, name)
+ if err != nil {
+ openFile.close(ctx)
+ if fdobj != nil {
+ fdobj.Close()
+ }
+ return nil, err
+ }
+ // Sanity-check that we walked to the file we created.
+ if createQID.Path != walkQID.Path {
+ // Probably due to concurrent remote filesystem mutation?
+ ctx.Warningf("gofer.dentry.createAndOpenChildLocked: created file has QID %v before walk, QID %v after (interop=%v)", createQID, walkQID, d.fs.opts.interop)
+ nonOpenFile.close(ctx)
+ openFile.close(ctx)
+ if fdobj != nil {
+ fdobj.Close()
+ }
+ return nil, syserror.EAGAIN
+ }
+
+ // Construct the new dentry.
+ child, err := d.fs.newDentry(ctx, nonOpenFile, createQID, attrMask, &attr)
+ if err != nil {
+ nonOpenFile.close(ctx)
+ openFile.close(ctx)
+ if fdobj != nil {
+ fdobj.Close()
+ }
+ return nil, err
+ }
+ // Incorporate the fid that was opened by lcreate.
+ useRegularFileFD := child.fileType() == linux.S_IFREG && !d.fs.opts.regularFilesUseSpecialFileFD
+ if useRegularFileFD {
+ child.handleMu.Lock()
+ child.handle.file = openFile
+ if fdobj != nil {
+ child.handle.fd = int32(fdobj.Release())
+ }
+ child.handleReadable = vfs.MayReadFileWithOpenFlags(opts.Flags)
+ child.handleWritable = vfs.MayWriteFileWithOpenFlags(opts.Flags)
+ child.handleMu.Unlock()
+ }
+ // Take a reference on the new dentry to be held by the new file
+ // description. (This reference also means that the new dentry is not
+ // eligible for caching yet, so we don't need to append to a dentry slice.)
+ child.refs = 1
+ // Insert the dentry into the tree.
+ d.IncRef() // reference held by child on its parent d
+ d.vfsd.InsertChild(&child.vfsd, name)
+ if d.fs.opts.interop != InteropModeShared {
+ d.touchCMtime(ctx)
+ delete(d.negativeChildren, name)
+ d.dirents = nil
+ }
+
+ // Finally, construct a file description representing the created file.
+ var childVFSFD *vfs.FileDescription
+ mnt.IncRef()
+ if useRegularFileFD {
+ fd := &regularFileFD{}
+ if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &child.vfsd, &vfs.FileDescriptionOptions{
+ AllowDirectIO: true,
+ }); err != nil {
+ return nil, err
+ }
+ childVFSFD = &fd.vfsfd
+ } else {
+ fd := &specialFileFD{
+ handle: handle{
+ file: openFile,
+ fd: -1,
+ },
+ }
+ if fdobj != nil {
+ fd.handle.fd = int32(fdobj.Release())
+ }
+ if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &child.vfsd, &vfs.FileDescriptionOptions{}); err != nil {
+ fd.handle.close(ctx)
+ return nil, err
+ }
+ childVFSFD = &fd.vfsfd
+ }
+ return childVFSFD, nil
+}
+
+// ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt.
+func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) {
+ var ds *[]*dentry
+ fs.renameMu.RLock()
+ defer fs.renameMuRUnlockAndCheckCaching(&ds)
+ d, err := fs.resolveLocked(ctx, rp, &ds)
+ if err != nil {
+ return "", err
+ }
+ if !d.isSymlink() {
+ return "", syserror.EINVAL
+ }
+ return d.readlink(ctx, rp.Mount())
+}
+
+// RenameAt implements vfs.FilesystemImpl.RenameAt.
+func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error {
+ if opts.Flags != 0 {
+ // Requires 9P support.
+ return syserror.EINVAL
+ }
+
+ var ds *[]*dentry
+ fs.renameMu.Lock()
+ defer fs.renameMuUnlockAndCheckCaching(&ds)
+ newParent, err := fs.walkParentDirLocked(ctx, rp, rp.Start().Impl().(*dentry), &ds)
+ if err != nil {
+ return err
+ }
+ newName := rp.Component()
+ if newName == "." || newName == ".." {
+ return syserror.EBUSY
+ }
+ mnt := rp.Mount()
+ if mnt != oldParentVD.Mount() {
+ return syserror.EXDEV
+ }
+ if err := mnt.CheckBeginWrite(); err != nil {
+ return err
+ }
+ defer mnt.EndWrite()
+
+ oldParent := oldParentVD.Dentry().Impl().(*dentry)
+ if fs.opts.interop == InteropModeShared {
+ if err := oldParent.updateFromGetattr(ctx); err != nil {
+ return err
+ }
+ }
+ if err := oldParent.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true); err != nil {
+ return err
+ }
+ vfsObj := rp.VirtualFilesystem()
+ // We need a dentry representing the renamed file since, if it's a
+ // directory, we need to check for write permission on it.
+ oldParent.dirMu.Lock()
+ defer oldParent.dirMu.Unlock()
+ renamed, err := fs.revalidateChildLocked(ctx, vfsObj, oldParent, oldName, oldParent.vfsd.Child(oldName), &ds)
+ if err != nil {
+ return err
+ }
+ if renamed == nil {
+ return syserror.ENOENT
+ }
+ if renamed.isDir() {
+ if renamed == newParent || renamed.vfsd.IsAncestorOf(&newParent.vfsd) {
+ return syserror.EINVAL
+ }
+ if oldParent != newParent {
+ if err := renamed.checkPermissions(rp.Credentials(), vfs.MayWrite, true); err != nil {
+ return err
+ }
+ }
+ } else {
+ if opts.MustBeDir || rp.MustBeDir() {
+ return syserror.ENOTDIR
+ }
+ }
+
+ if oldParent != newParent {
+ if err := newParent.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true); err != nil {
+ return err
+ }
+ newParent.dirMu.Lock()
+ defer newParent.dirMu.Unlock()
+ }
+ if newParent.isDeleted() {
+ return syserror.ENOENT
+ }
+ replacedVFSD := newParent.vfsd.Child(newName)
+ var replaced *dentry
+ // This is similar to unlinkAt, except:
+ //
+ // - We revalidate the replaced dentry unconditionally for simplicity.
+ //
+ // - If rp.MustBeDir(), then we need a dentry representing the replaced
+ // file regardless to confirm that it's a directory.
+ if replacedVFSD != nil || rp.MustBeDir() {
+ replaced, err = fs.revalidateChildLocked(ctx, vfsObj, newParent, newName, replacedVFSD, &ds)
+ if err != nil {
+ return err
+ }
+ if replaced != nil {
+ if replaced.isDir() {
+ if !renamed.isDir() {
+ return syserror.EISDIR
+ }
+ } else {
+ if rp.MustBeDir() || renamed.isDir() {
+ return syserror.ENOTDIR
+ }
+ }
+ replacedVFSD = &replaced.vfsd
+ } else {
+ replacedVFSD = nil
+ }
+ }
+
+ if oldParent == newParent && oldName == newName {
+ return nil
+ }
+ if err := vfsObj.PrepareRenameDentry(vfs.MountNamespaceFromContext(ctx), &renamed.vfsd, replacedVFSD); err != nil {
+ return err
+ }
+ if err := renamed.file.rename(ctx, newParent.file, newName); err != nil {
+ vfsObj.AbortRenameDentry(&renamed.vfsd, replacedVFSD)
+ return err
+ }
+ if fs.opts.interop != InteropModeShared {
+ oldParent.cacheNegativeChildLocked(oldName)
+ oldParent.dirents = nil
+ delete(newParent.negativeChildren, newName)
+ newParent.dirents = nil
+ }
+ vfsObj.CommitRenameReplaceDentry(&renamed.vfsd, &newParent.vfsd, newName, replacedVFSD)
+ return nil
+}
+
+// RmdirAt implements vfs.FilesystemImpl.RmdirAt.
+func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error {
+ return fs.unlinkAt(ctx, rp, true /* dir */)
+}
+
+// SetStatAt implements vfs.FilesystemImpl.SetStatAt.
+func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error {
+ var ds *[]*dentry
+ fs.renameMu.RLock()
+ defer fs.renameMuRUnlockAndCheckCaching(&ds)
+ d, err := fs.resolveLocked(ctx, rp, &ds)
+ if err != nil {
+ return err
+ }
+ return d.setStat(ctx, rp.Credentials(), &opts.Stat, rp.Mount())
+}
+
+// StatAt implements vfs.FilesystemImpl.StatAt.
+func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) {
+ var ds *[]*dentry
+ fs.renameMu.RLock()
+ defer fs.renameMuRUnlockAndCheckCaching(&ds)
+ d, err := fs.resolveLocked(ctx, rp, &ds)
+ if err != nil {
+ return linux.Statx{}, err
+ }
+ // Since walking updates metadata for all traversed dentries under
+ // InteropModeShared, including the returned one, we can return cached
+ // metadata here regardless of fs.opts.interop.
+ var stat linux.Statx
+ d.statTo(&stat)
+ return stat, nil
+}
+
+// StatFSAt implements vfs.FilesystemImpl.StatFSAt.
+func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) {
+ var ds *[]*dentry
+ fs.renameMu.RLock()
+ defer fs.renameMuRUnlockAndCheckCaching(&ds)
+ d, err := fs.resolveLocked(ctx, rp, &ds)
+ if err != nil {
+ return linux.Statfs{}, err
+ }
+ fsstat, err := d.file.statFS(ctx)
+ if err != nil {
+ return linux.Statfs{}, err
+ }
+ nameLen := uint64(fsstat.NameLength)
+ if nameLen > maxFilenameLen {
+ nameLen = maxFilenameLen
+ }
+ return linux.Statfs{
+ // This is primarily for distinguishing a gofer file system in
+ // tests. Testing is important, so instead of defining
+ // something completely random, use a standard value.
+ Type: linux.V9FS_MAGIC,
+ BlockSize: int64(fsstat.BlockSize),
+ Blocks: fsstat.Blocks,
+ BlocksFree: fsstat.BlocksFree,
+ BlocksAvailable: fsstat.BlocksAvailable,
+ Files: fsstat.Files,
+ FilesFree: fsstat.FilesFree,
+ NameLength: nameLen,
+ }, nil
+}
+
+// SymlinkAt implements vfs.FilesystemImpl.SymlinkAt.
+func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error {
+ return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, name string) error {
+ creds := rp.Credentials()
+ _, err := parent.file.symlink(ctx, target, name, (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID))
+ return err
+ })
+}
+
+// UnlinkAt implements vfs.FilesystemImpl.UnlinkAt.
+func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error {
+ return fs.unlinkAt(ctx, rp, false /* dir */)
+}
+
+// ListxattrAt implements vfs.FilesystemImpl.ListxattrAt.
+func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath) ([]string, error) {
+ var ds *[]*dentry
+ fs.renameMu.RLock()
+ defer fs.renameMuRUnlockAndCheckCaching(&ds)
+ d, err := fs.resolveLocked(ctx, rp, &ds)
+ if err != nil {
+ return nil, err
+ }
+ return d.listxattr(ctx)
+}
+
+// GetxattrAt implements vfs.FilesystemImpl.GetxattrAt.
+func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) (string, error) {
+ var ds *[]*dentry
+ fs.renameMu.RLock()
+ defer fs.renameMuRUnlockAndCheckCaching(&ds)
+ d, err := fs.resolveLocked(ctx, rp, &ds)
+ if err != nil {
+ return "", err
+ }
+ return d.getxattr(ctx, name)
+}
+
+// SetxattrAt implements vfs.FilesystemImpl.SetxattrAt.
+func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetxattrOptions) error {
+ var ds *[]*dentry
+ fs.renameMu.RLock()
+ defer fs.renameMuRUnlockAndCheckCaching(&ds)
+ d, err := fs.resolveLocked(ctx, rp, &ds)
+ if err != nil {
+ return err
+ }
+ return d.setxattr(ctx, &opts)
+}
+
+// RemovexattrAt implements vfs.FilesystemImpl.RemovexattrAt.
+func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error {
+ var ds *[]*dentry
+ fs.renameMu.RLock()
+ defer fs.renameMuRUnlockAndCheckCaching(&ds)
+ d, err := fs.resolveLocked(ctx, rp, &ds)
+ if err != nil {
+ return err
+ }
+ return d.removexattr(ctx, name)
+}
+
+// PrependPath implements vfs.FilesystemImpl.PrependPath.
+func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error {
+ fs.renameMu.RLock()
+ defer fs.renameMu.RUnlock()
+ return vfs.GenericPrependPath(vfsroot, vd, b)
+}
diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go
new file mode 100644
index 000000000..d0552bd99
--- /dev/null
+++ b/pkg/sentry/fsimpl/gofer/gofer.go
@@ -0,0 +1,1147 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package gofer provides a filesystem implementation that is backed by a 9p
+// server, interchangably referred to as "gofers" throughout this package.
+//
+// Lock order:
+// regularFileFD/directoryFD.mu
+// filesystem.renameMu
+// dentry.dirMu
+// filesystem.syncMu
+// dentry.metadataMu
+// *** "memmap.Mappable locks" below this point
+// dentry.mapsMu
+// *** "memmap.Mappable locks taken by Translate" below this point
+// dentry.handleMu
+// dentry.dataMu
+//
+// Locking dentry.dirMu in multiple dentries requires holding
+// filesystem.renameMu for writing.
+package gofer
+
+import (
+ "fmt"
+ "strconv"
+ "sync"
+ "sync/atomic"
+ "syscall"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/pgalloc"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/unet"
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+// FilesystemType implements vfs.FilesystemType.
+type FilesystemType struct{}
+
+// filesystem implements vfs.FilesystemImpl.
+type filesystem struct {
+ vfsfs vfs.Filesystem
+
+ // mfp is used to allocate memory that caches regular file contents. mfp is
+ // immutable.
+ mfp pgalloc.MemoryFileProvider
+
+ // Immutable options.
+ opts filesystemOptions
+
+ // client is the client used by this filesystem. client is immutable.
+ client *p9.Client
+
+ // uid and gid are the effective KUID and KGID of the filesystem's creator,
+ // and are used as the owner and group for files that don't specify one.
+ // uid and gid are immutable.
+ uid auth.KUID
+ gid auth.KGID
+
+ // renameMu serves two purposes:
+ //
+ // - It synchronizes path resolution with renaming initiated by this
+ // client.
+ //
+ // - It is held by path resolution to ensure that reachable dentries remain
+ // valid. A dentry is reachable by path resolution if it has a non-zero
+ // reference count (such that it is usable as vfs.ResolvingPath.Start() or
+ // is reachable from its children), or if it is a child dentry (such that
+ // it is reachable from its parent).
+ renameMu sync.RWMutex
+
+ // cachedDentries contains all dentries with 0 references. (Due to race
+ // conditions, it may also contain dentries with non-zero references.)
+ // cachedDentriesLen is the number of dentries in cachedDentries. These
+ // fields are protected by renameMu.
+ cachedDentries dentryList
+ cachedDentriesLen uint64
+
+ // dentries contains all dentries in this filesystem. specialFileFDs
+ // contains all open specialFileFDs. These fields are protected by syncMu.
+ syncMu sync.Mutex
+ dentries map[*dentry]struct{}
+ specialFileFDs map[*specialFileFD]struct{}
+}
+
+type filesystemOptions struct {
+ // "Standard" 9P options.
+ fd int
+ aname string
+ interop InteropMode // derived from the "cache" mount option
+ msize uint32
+ version string
+
+ // maxCachedDentries is the maximum number of dentries with 0 references
+ // retained by the client.
+ maxCachedDentries uint64
+
+ // If forcePageCache is true, host FDs may not be used for application
+ // memory mappings even if available; instead, the client must perform its
+ // own caching of regular file pages. This is primarily useful for testing.
+ forcePageCache bool
+
+ // If limitHostFDTranslation is true, apply maxFillRange() constraints to
+ // host FD mappings returned by dentry.(memmap.Mappable).Translate(). This
+ // makes memory accounting behavior more consistent between cases where
+ // host FDs are / are not available, but may increase the frequency of
+ // sentry-handled page faults on files for which a host FD is available.
+ limitHostFDTranslation bool
+
+ // If overlayfsStaleRead is true, O_RDONLY host FDs provided by the remote
+ // filesystem may not be coherent with writable host FDs opened later, so
+ // mappings of the former must be replaced by mappings of the latter. This
+ // is usually only the case when the remote filesystem is an overlayfs
+ // mount on Linux < 4.19.
+ overlayfsStaleRead bool
+
+ // If regularFilesUseSpecialFileFD is true, application FDs representing
+ // regular files will use distinct file handles for each FD, in the same
+ // way that application FDs representing "special files" such as sockets
+ // do. Note that this disables client caching and mmap for regular files.
+ regularFilesUseSpecialFileFD bool
+}
+
+// InteropMode controls the client's interaction with other remote filesystem
+// users.
+type InteropMode uint32
+
+const (
+ // InteropModeExclusive is appropriate when the filesystem client is the
+ // only user of the remote filesystem.
+ //
+ // - The client may cache arbitrary filesystem state (file data, metadata,
+ // filesystem structure, etc.).
+ //
+ // - Client changes to filesystem state may be sent to the remote
+ // filesystem asynchronously, except when server permission checks are
+ // necessary.
+ //
+ // - File timestamps are based on client clocks. This ensures that users of
+ // the client observe timestamps that are coherent with their own clocks
+ // and consistent with Linux's semantics. However, since it is not always
+ // possible for clients to set arbitrary atimes and mtimes, and never
+ // possible for clients to set arbitrary ctimes, file timestamp changes are
+ // stored in the client only and never sent to the remote filesystem.
+ InteropModeExclusive InteropMode = iota
+
+ // InteropModeWritethrough is appropriate when there are read-only users of
+ // the remote filesystem that expect to observe changes made by the
+ // filesystem client.
+ //
+ // - The client may cache arbitrary filesystem state.
+ //
+ // - Client changes to filesystem state must be sent to the remote
+ // filesystem synchronously.
+ //
+ // - File timestamps are based on client clocks. As a corollary, access
+ // timestamp changes from other remote filesystem users will not be visible
+ // to the client.
+ InteropModeWritethrough
+
+ // InteropModeShared is appropriate when there are users of the remote
+ // filesystem that may mutate its state other than the client.
+ //
+ // - The client must verify cached filesystem state before using it.
+ //
+ // - Client changes to filesystem state must be sent to the remote
+ // filesystem synchronously.
+ //
+ // - File timestamps are based on server clocks. This is necessary to
+ // ensure that timestamp changes are synchronized between remote filesystem
+ // users.
+ //
+ // Note that the correctness of InteropModeShared depends on the server
+ // correctly implementing 9P fids (i.e. each fid immutably represents a
+ // single filesystem object), even in the presence of remote filesystem
+ // mutations from other users. If this is violated, the behavior of the
+ // client is undefined.
+ InteropModeShared
+)
+
+// GetFilesystem implements vfs.FilesystemType.GetFilesystem.
+func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
+ mfp := pgalloc.MemoryFileProviderFromContext(ctx)
+ if mfp == nil {
+ ctx.Warningf("gofer.FilesystemType.GetFilesystem: context does not provide a pgalloc.MemoryFileProvider")
+ return nil, nil, syserror.EINVAL
+ }
+
+ mopts := vfs.GenericParseMountOptions(opts.Data)
+ var fsopts filesystemOptions
+
+ // Check that the transport is "fd".
+ trans, ok := mopts["trans"]
+ if !ok {
+ ctx.Warningf("gofer.FilesystemType.GetFilesystem: transport must be specified as 'trans=fd'")
+ return nil, nil, syserror.EINVAL
+ }
+ delete(mopts, "trans")
+ if trans != "fd" {
+ ctx.Warningf("gofer.FilesystemType.GetFilesystem: unsupported transport: trans=%s", trans)
+ return nil, nil, syserror.EINVAL
+ }
+
+ // Check that read and write FDs are provided and identical.
+ rfdstr, ok := mopts["rfdno"]
+ if !ok {
+ ctx.Warningf("gofer.FilesystemType.GetFilesystem: read FD must be specified as 'rfdno=<file descriptor>")
+ return nil, nil, syserror.EINVAL
+ }
+ delete(mopts, "rfdno")
+ rfd, err := strconv.Atoi(rfdstr)
+ if err != nil {
+ ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid read FD: rfdno=%s", rfdstr)
+ return nil, nil, syserror.EINVAL
+ }
+ wfdstr, ok := mopts["wfdno"]
+ if !ok {
+ ctx.Warningf("gofer.FilesystemType.GetFilesystem: write FD must be specified as 'wfdno=<file descriptor>")
+ return nil, nil, syserror.EINVAL
+ }
+ delete(mopts, "wfdno")
+ wfd, err := strconv.Atoi(wfdstr)
+ if err != nil {
+ ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid write FD: wfdno=%s", wfdstr)
+ return nil, nil, syserror.EINVAL
+ }
+ if rfd != wfd {
+ ctx.Warningf("gofer.FilesystemType.GetFilesystem: read FD (%d) and write FD (%d) must be equal", rfd, wfd)
+ return nil, nil, syserror.EINVAL
+ }
+ fsopts.fd = rfd
+
+ // Get the attach name.
+ fsopts.aname = "/"
+ if aname, ok := mopts["aname"]; ok {
+ delete(mopts, "aname")
+ fsopts.aname = aname
+ }
+
+ // Parse the cache policy. For historical reasons, this defaults to the
+ // least generally-applicable option, InteropModeExclusive.
+ fsopts.interop = InteropModeExclusive
+ if cache, ok := mopts["cache"]; ok {
+ delete(mopts, "cache")
+ switch cache {
+ case "fscache":
+ fsopts.interop = InteropModeExclusive
+ case "fscache_writethrough":
+ fsopts.interop = InteropModeWritethrough
+ case "none":
+ fsopts.regularFilesUseSpecialFileFD = true
+ fallthrough
+ case "remote_revalidating":
+ fsopts.interop = InteropModeShared
+ default:
+ ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid cache policy: cache=%s", cache)
+ return nil, nil, syserror.EINVAL
+ }
+ }
+
+ // Parse the 9P message size.
+ fsopts.msize = 1024 * 1024 // 1M, tested to give good enough performance up to 64M
+ if msizestr, ok := mopts["msize"]; ok {
+ delete(mopts, "msize")
+ msize, err := strconv.ParseUint(msizestr, 10, 32)
+ if err != nil {
+ ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid message size: msize=%s", msizestr)
+ return nil, nil, syserror.EINVAL
+ }
+ fsopts.msize = uint32(msize)
+ }
+
+ // Parse the 9P protocol version.
+ fsopts.version = p9.HighestVersionString()
+ if version, ok := mopts["version"]; ok {
+ delete(mopts, "version")
+ fsopts.version = version
+ }
+
+ // Parse the dentry cache limit.
+ fsopts.maxCachedDentries = 1000
+ if str, ok := mopts["dentry_cache_limit"]; ok {
+ delete(mopts, "dentry_cache_limit")
+ maxCachedDentries, err := strconv.ParseUint(str, 10, 64)
+ if err != nil {
+ ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid dentry cache limit: dentry_cache_limit=%s", str)
+ return nil, nil, syserror.EINVAL
+ }
+ fsopts.maxCachedDentries = maxCachedDentries
+ }
+
+ // Handle simple flags.
+ if _, ok := mopts["force_page_cache"]; ok {
+ delete(mopts, "force_page_cache")
+ fsopts.forcePageCache = true
+ }
+ if _, ok := mopts["limit_host_fd_translation"]; ok {
+ delete(mopts, "limit_host_fd_translation")
+ fsopts.limitHostFDTranslation = true
+ }
+ if _, ok := mopts["overlayfs_stale_read"]; ok {
+ delete(mopts, "overlayfs_stale_read")
+ fsopts.overlayfsStaleRead = true
+ }
+ // fsopts.regularFilesUseSpecialFileFD can only be enabled by specifying
+ // "cache=none".
+
+ // Check for unparsed options.
+ if len(mopts) != 0 {
+ ctx.Warningf("gofer.FilesystemType.GetFilesystem: unknown options: %v", mopts)
+ return nil, nil, syserror.EINVAL
+ }
+
+ // Establish a connection with the server.
+ conn, err := unet.NewSocket(fsopts.fd)
+ if err != nil {
+ return nil, nil, err
+ }
+
+ // Perform version negotiation with the server.
+ ctx.UninterruptibleSleepStart(false)
+ client, err := p9.NewClient(conn, fsopts.msize, fsopts.version)
+ ctx.UninterruptibleSleepFinish(false)
+ if err != nil {
+ conn.Close()
+ return nil, nil, err
+ }
+ // Ownership of conn has been transferred to client.
+
+ // Perform attach to obtain the filesystem root.
+ ctx.UninterruptibleSleepStart(false)
+ attached, err := client.Attach(fsopts.aname)
+ ctx.UninterruptibleSleepFinish(false)
+ if err != nil {
+ client.Close()
+ return nil, nil, err
+ }
+ attachFile := p9file{attached}
+ qid, attrMask, attr, err := attachFile.getAttr(ctx, dentryAttrMask())
+ if err != nil {
+ attachFile.close(ctx)
+ client.Close()
+ return nil, nil, err
+ }
+
+ // Construct the filesystem object.
+ fs := &filesystem{
+ mfp: mfp,
+ opts: fsopts,
+ uid: creds.EffectiveKUID,
+ gid: creds.EffectiveKGID,
+ client: client,
+ dentries: make(map[*dentry]struct{}),
+ specialFileFDs: make(map[*specialFileFD]struct{}),
+ }
+ fs.vfsfs.Init(vfsObj, fs)
+
+ // Construct the root dentry.
+ root, err := fs.newDentry(ctx, attachFile, qid, attrMask, &attr)
+ if err != nil {
+ attachFile.close(ctx)
+ fs.vfsfs.DecRef()
+ return nil, nil, err
+ }
+ // Set the root's reference count to 2. One reference is returned to the
+ // caller, and the other is deliberately leaked to prevent the root from
+ // being "cached" and subsequently evicted. Its resources will still be
+ // cleaned up by fs.Release().
+ root.refs = 2
+
+ return &fs.vfsfs, &root.vfsd, nil
+}
+
+// Release implements vfs.FilesystemImpl.Release.
+func (fs *filesystem) Release() {
+ ctx := context.Background()
+ mf := fs.mfp.MemoryFile()
+
+ fs.syncMu.Lock()
+ for d := range fs.dentries {
+ d.handleMu.Lock()
+ d.dataMu.Lock()
+ if d.handleWritable {
+ // Write dirty cached data to the remote file.
+ if err := fsutil.SyncDirtyAll(ctx, &d.cache, &d.dirty, d.size, fs.mfp.MemoryFile(), d.handle.writeFromBlocksAt); err != nil {
+ log.Warningf("gofer.filesystem.Release: failed to flush dentry: %v", err)
+ }
+ // TODO(jamieliu): Do we need to flushf/fsync d?
+ }
+ // Discard cached pages.
+ d.cache.DropAll(mf)
+ d.dirty.RemoveAll()
+ d.dataMu.Unlock()
+ // Close the host fd if one exists.
+ if d.handle.fd >= 0 {
+ syscall.Close(int(d.handle.fd))
+ d.handle.fd = -1
+ }
+ d.handleMu.Unlock()
+ }
+ // There can't be any specialFileFDs still using fs, since each such
+ // FileDescription would hold a reference on a Mount holding a reference on
+ // fs.
+ fs.syncMu.Unlock()
+
+ // Close the connection to the server. This implicitly clunks all fids.
+ fs.client.Close()
+}
+
+// dentry implements vfs.DentryImpl.
+type dentry struct {
+ vfsd vfs.Dentry
+
+ // refs is the reference count. Each dentry holds a reference on its
+ // parent, even if disowned. refs is accessed using atomic memory
+ // operations.
+ refs int64
+
+ // fs is the owning filesystem. fs is immutable.
+ fs *filesystem
+
+ // We don't support hard links, so each dentry maps 1:1 to an inode.
+
+ // file is the unopened p9.File that backs this dentry. file is immutable.
+ file p9file
+
+ // If deleted is non-zero, the file represented by this dentry has been
+ // deleted. deleted is accessed using atomic memory operations.
+ deleted uint32
+
+ // If cached is true, dentryEntry links dentry into
+ // filesystem.cachedDentries. cached and dentryEntry are protected by
+ // filesystem.renameMu.
+ cached bool
+ dentryEntry
+
+ dirMu sync.Mutex
+
+ // If this dentry represents a directory, and InteropModeShared is not in
+ // effect, negativeChildren is a set of child names in this directory that
+ // are known not to exist. negativeChildren is protected by dirMu.
+ negativeChildren map[string]struct{}
+
+ // If this dentry represents a directory, InteropModeShared is not in
+ // effect, and dirents is not nil, it is a cache of all entries in the
+ // directory, in the order they were returned by the server. dirents is
+ // protected by dirMu.
+ dirents []vfs.Dirent
+
+ // Cached metadata; protected by metadataMu and accessed using atomic
+ // memory operations unless otherwise specified.
+ metadataMu sync.Mutex
+ ino uint64 // immutable
+ mode uint32 // type is immutable, perms are mutable
+ uid uint32 // auth.KUID, but stored as raw uint32 for sync/atomic
+ gid uint32 // auth.KGID, but ...
+ blockSize uint32 // 0 if unknown
+ // Timestamps, all nsecs from the Unix epoch.
+ atime int64
+ mtime int64
+ ctime int64
+ btime int64
+ // File size, protected by both metadataMu and dataMu (i.e. both must be
+ // locked to mutate it).
+ size uint64
+
+ mapsMu sync.Mutex
+
+ // If this dentry represents a regular file, mappings tracks mappings of
+ // the file into memmap.MappingSpaces. mappings is protected by mapsMu.
+ mappings memmap.MappingSet
+
+ // If this dentry represents a regular file or directory:
+ //
+ // - handle is the I/O handle used by all regularFileFDs/directoryFDs
+ // representing this dentry.
+ //
+ // - handleReadable is true if handle is readable.
+ //
+ // - handleWritable is true if handle is writable.
+ //
+ // Invariants:
+ //
+ // - If handleReadable == handleWritable == false, then handle.file == nil
+ // (i.e. there is no open handle). Conversely, if handleReadable ||
+ // handleWritable == true, then handle.file != nil (i.e. there is an open
+ // handle).
+ //
+ // - handleReadable and handleWritable cannot transition from true to false
+ // (i.e. handles may not be downgraded).
+ //
+ // These fields are protected by handleMu.
+ handleMu sync.RWMutex
+ handle handle
+ handleReadable bool
+ handleWritable bool
+
+ dataMu sync.RWMutex
+
+ // If this dentry represents a regular file that is client-cached, cache
+ // maps offsets into the cached file to offsets into
+ // filesystem.mfp.MemoryFile() that store the file's data. cache is
+ // protected by dataMu.
+ cache fsutil.FileRangeSet
+
+ // If this dentry represents a regular file that is client-cached, dirty
+ // tracks dirty segments in cache. dirty is protected by dataMu.
+ dirty fsutil.DirtySet
+
+ // pf implements platform.File for mappings of handle.fd.
+ pf dentryPlatformFile
+
+ // If this dentry represents a symbolic link, InteropModeShared is not in
+ // effect, and haveTarget is true, target is the symlink target. haveTarget
+ // and target are protected by dataMu.
+ haveTarget bool
+ target string
+}
+
+// dentryAttrMask returns a p9.AttrMask enabling all attributes used by the
+// gofer client.
+func dentryAttrMask() p9.AttrMask {
+ return p9.AttrMask{
+ Mode: true,
+ UID: true,
+ GID: true,
+ ATime: true,
+ MTime: true,
+ CTime: true,
+ Size: true,
+ BTime: true,
+ }
+}
+
+// newDentry creates a new dentry representing the given file. The dentry
+// initially has no references, but is not cached; it is the caller's
+// responsibility to set the dentry's reference count and/or call
+// dentry.checkCachingLocked() as appropriate.
+func (fs *filesystem) newDentry(ctx context.Context, file p9file, qid p9.QID, mask p9.AttrMask, attr *p9.Attr) (*dentry, error) {
+ if !mask.Mode {
+ ctx.Warningf("can't create gofer.dentry without file type")
+ return nil, syserror.EIO
+ }
+ if attr.Mode.FileType() == p9.ModeRegular && !mask.Size {
+ ctx.Warningf("can't create regular file gofer.dentry without file size")
+ return nil, syserror.EIO
+ }
+
+ d := &dentry{
+ fs: fs,
+ file: file,
+ ino: qid.Path,
+ mode: uint32(attr.Mode),
+ uid: uint32(fs.uid),
+ gid: uint32(fs.gid),
+ blockSize: usermem.PageSize,
+ handle: handle{
+ fd: -1,
+ },
+ }
+ d.pf.dentry = d
+ if mask.UID {
+ d.uid = uint32(attr.UID)
+ }
+ if mask.GID {
+ d.gid = uint32(attr.GID)
+ }
+ if mask.Size {
+ d.size = attr.Size
+ }
+ if attr.BlockSize != 0 {
+ d.blockSize = uint32(attr.BlockSize)
+ }
+ if mask.ATime {
+ d.atime = dentryTimestampFromP9(attr.ATimeSeconds, attr.ATimeNanoSeconds)
+ }
+ if mask.MTime {
+ d.mtime = dentryTimestampFromP9(attr.MTimeSeconds, attr.MTimeNanoSeconds)
+ }
+ if mask.CTime {
+ d.ctime = dentryTimestampFromP9(attr.CTimeSeconds, attr.CTimeNanoSeconds)
+ }
+ if mask.BTime {
+ d.btime = dentryTimestampFromP9(attr.BTimeSeconds, attr.BTimeNanoSeconds)
+ }
+ d.vfsd.Init(d)
+
+ fs.syncMu.Lock()
+ fs.dentries[d] = struct{}{}
+ fs.syncMu.Unlock()
+ return d, nil
+}
+
+// updateFromP9Attrs is called to update d's metadata after an update from the
+// remote filesystem.
+func (d *dentry) updateFromP9Attrs(mask p9.AttrMask, attr *p9.Attr) {
+ d.metadataMu.Lock()
+ if mask.Mode {
+ if got, want := uint32(attr.Mode.FileType()), d.fileType(); got != want {
+ d.metadataMu.Unlock()
+ panic(fmt.Sprintf("gofer.dentry file type changed from %#o to %#o", want, got))
+ }
+ atomic.StoreUint32(&d.mode, uint32(attr.Mode))
+ }
+ if mask.UID {
+ atomic.StoreUint32(&d.uid, uint32(attr.UID))
+ }
+ if mask.GID {
+ atomic.StoreUint32(&d.gid, uint32(attr.GID))
+ }
+ // There is no P9_GETATTR_* bit for I/O block size.
+ if attr.BlockSize != 0 {
+ atomic.StoreUint32(&d.blockSize, uint32(attr.BlockSize))
+ }
+ if mask.ATime {
+ atomic.StoreInt64(&d.atime, dentryTimestampFromP9(attr.ATimeSeconds, attr.ATimeNanoSeconds))
+ }
+ if mask.MTime {
+ atomic.StoreInt64(&d.mtime, dentryTimestampFromP9(attr.MTimeSeconds, attr.MTimeNanoSeconds))
+ }
+ if mask.CTime {
+ atomic.StoreInt64(&d.ctime, dentryTimestampFromP9(attr.CTimeSeconds, attr.CTimeNanoSeconds))
+ }
+ if mask.BTime {
+ atomic.StoreInt64(&d.btime, dentryTimestampFromP9(attr.BTimeSeconds, attr.BTimeNanoSeconds))
+ }
+ if mask.Size {
+ d.dataMu.Lock()
+ atomic.StoreUint64(&d.size, attr.Size)
+ d.dataMu.Unlock()
+ }
+ d.metadataMu.Unlock()
+}
+
+func (d *dentry) updateFromGetattr(ctx context.Context) error {
+ // Use d.handle.file, which represents a 9P fid that has been opened, in
+ // preference to d.file, which represents a 9P fid that has not. This may
+ // be significantly more efficient in some implementations.
+ var (
+ file p9file
+ handleMuRLocked bool
+ )
+ d.handleMu.RLock()
+ if !d.handle.file.isNil() {
+ file = d.handle.file
+ handleMuRLocked = true
+ } else {
+ file = d.file
+ d.handleMu.RUnlock()
+ }
+ _, attrMask, attr, err := file.getAttr(ctx, dentryAttrMask())
+ if handleMuRLocked {
+ d.handleMu.RUnlock()
+ }
+ if err != nil {
+ return err
+ }
+ d.updateFromP9Attrs(attrMask, &attr)
+ return nil
+}
+
+func (d *dentry) fileType() uint32 {
+ return atomic.LoadUint32(&d.mode) & linux.S_IFMT
+}
+
+func (d *dentry) statTo(stat *linux.Statx) {
+ stat.Mask = linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_NLINK | linux.STATX_UID | linux.STATX_GID | linux.STATX_ATIME | linux.STATX_MTIME | linux.STATX_CTIME | linux.STATX_INO | linux.STATX_SIZE | linux.STATX_BLOCKS | linux.STATX_BTIME
+ stat.Blksize = atomic.LoadUint32(&d.blockSize)
+ stat.Nlink = 1
+ if d.isDir() {
+ stat.Nlink = 2
+ }
+ stat.UID = atomic.LoadUint32(&d.uid)
+ stat.GID = atomic.LoadUint32(&d.gid)
+ stat.Mode = uint16(atomic.LoadUint32(&d.mode))
+ stat.Ino = d.ino
+ stat.Size = atomic.LoadUint64(&d.size)
+ // This is consistent with regularFileFD.Seek(), which treats regular files
+ // as having no holes.
+ stat.Blocks = (stat.Size + 511) / 512
+ stat.Atime = statxTimestampFromDentry(atomic.LoadInt64(&d.atime))
+ stat.Btime = statxTimestampFromDentry(atomic.LoadInt64(&d.btime))
+ stat.Ctime = statxTimestampFromDentry(atomic.LoadInt64(&d.ctime))
+ stat.Mtime = statxTimestampFromDentry(atomic.LoadInt64(&d.mtime))
+ // TODO(jamieliu): device number
+}
+
+func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, stat *linux.Statx, mnt *vfs.Mount) error {
+ if stat.Mask == 0 {
+ return nil
+ }
+ if stat.Mask&^(linux.STATX_MODE|linux.STATX_UID|linux.STATX_GID|linux.STATX_ATIME|linux.STATX_MTIME|linux.STATX_SIZE) != 0 {
+ return syserror.EPERM
+ }
+ if err := vfs.CheckSetStat(creds, stat, uint16(atomic.LoadUint32(&d.mode))&^linux.S_IFMT, auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid))); err != nil {
+ return err
+ }
+ if err := mnt.CheckBeginWrite(); err != nil {
+ return err
+ }
+ defer mnt.EndWrite()
+ setLocalAtime := false
+ setLocalMtime := false
+ if d.fs.opts.interop != InteropModeShared {
+ // Timestamp updates will be handled locally.
+ setLocalAtime = stat.Mask&linux.STATX_ATIME != 0
+ setLocalMtime = stat.Mask&linux.STATX_MTIME != 0
+ stat.Mask &^= linux.STATX_ATIME | linux.STATX_MTIME
+ if !setLocalMtime && (stat.Mask&linux.STATX_SIZE != 0) {
+ // Truncate updates mtime.
+ setLocalMtime = true
+ stat.Mtime.Nsec = linux.UTIME_NOW
+ }
+ }
+ d.metadataMu.Lock()
+ defer d.metadataMu.Unlock()
+ if stat.Mask != 0 {
+ if err := d.file.setAttr(ctx, p9.SetAttrMask{
+ Permissions: stat.Mask&linux.STATX_MODE != 0,
+ UID: stat.Mask&linux.STATX_UID != 0,
+ GID: stat.Mask&linux.STATX_GID != 0,
+ Size: stat.Mask&linux.STATX_SIZE != 0,
+ ATime: stat.Mask&linux.STATX_ATIME != 0,
+ MTime: stat.Mask&linux.STATX_MTIME != 0,
+ ATimeNotSystemTime: stat.Atime.Nsec != linux.UTIME_NOW,
+ MTimeNotSystemTime: stat.Mtime.Nsec != linux.UTIME_NOW,
+ }, p9.SetAttr{
+ Permissions: p9.FileMode(stat.Mode),
+ UID: p9.UID(stat.UID),
+ GID: p9.GID(stat.GID),
+ Size: stat.Size,
+ ATimeSeconds: uint64(stat.Atime.Sec),
+ ATimeNanoSeconds: uint64(stat.Atime.Nsec),
+ MTimeSeconds: uint64(stat.Mtime.Sec),
+ MTimeNanoSeconds: uint64(stat.Mtime.Nsec),
+ }); err != nil {
+ return err
+ }
+ }
+ if d.fs.opts.interop == InteropModeShared {
+ // There's no point to updating d's metadata in this case since it'll
+ // be overwritten by revalidation before the next time it's used
+ // anyway. (InteropModeShared inhibits client caching of regular file
+ // data, so there's no cache to truncate either.)
+ return nil
+ }
+ now, haveNow := nowFromContext(ctx)
+ if !haveNow {
+ ctx.Warningf("gofer.dentry.setStat: current time not available")
+ }
+ if stat.Mask&linux.STATX_MODE != 0 {
+ atomic.StoreUint32(&d.mode, d.fileType()|uint32(stat.Mode))
+ }
+ if stat.Mask&linux.STATX_UID != 0 {
+ atomic.StoreUint32(&d.uid, stat.UID)
+ }
+ if stat.Mask&linux.STATX_GID != 0 {
+ atomic.StoreUint32(&d.gid, stat.GID)
+ }
+ if setLocalAtime {
+ if stat.Atime.Nsec == linux.UTIME_NOW {
+ if haveNow {
+ atomic.StoreInt64(&d.atime, now)
+ }
+ } else {
+ atomic.StoreInt64(&d.atime, dentryTimestampFromStatx(stat.Atime))
+ }
+ }
+ if setLocalMtime {
+ if stat.Mtime.Nsec == linux.UTIME_NOW {
+ if haveNow {
+ atomic.StoreInt64(&d.mtime, now)
+ }
+ } else {
+ atomic.StoreInt64(&d.mtime, dentryTimestampFromStatx(stat.Mtime))
+ }
+ }
+ if haveNow {
+ atomic.StoreInt64(&d.ctime, now)
+ }
+ if stat.Mask&linux.STATX_SIZE != 0 {
+ d.dataMu.Lock()
+ oldSize := d.size
+ d.size = stat.Size
+ // d.dataMu must be unlocked to lock d.mapsMu and invalidate mappings
+ // below. This allows concurrent calls to Read/Translate/etc. These
+ // functions synchronize with truncation by refusing to use cache
+ // contents beyond the new d.size. (We are still holding d.metadataMu,
+ // so we can't race with Write or another truncate.)
+ d.dataMu.Unlock()
+ if d.size < oldSize {
+ oldpgend := pageRoundUp(oldSize)
+ newpgend := pageRoundUp(d.size)
+ if oldpgend != newpgend {
+ d.mapsMu.Lock()
+ d.mappings.Invalidate(memmap.MappableRange{newpgend, oldpgend}, memmap.InvalidateOpts{
+ // Compare Linux's mm/truncate.c:truncate_setsize() =>
+ // truncate_pagecache() =>
+ // mm/memory.c:unmap_mapping_range(evencows=1).
+ InvalidatePrivate: true,
+ })
+ d.mapsMu.Unlock()
+ }
+ // We are now guaranteed that there are no translations of
+ // truncated pages, and can remove them from the cache. Since
+ // truncated pages have been removed from the remote file, they
+ // should be dropped without being written back.
+ d.dataMu.Lock()
+ d.cache.Truncate(d.size, d.fs.mfp.MemoryFile())
+ d.dirty.KeepClean(memmap.MappableRange{d.size, oldpgend})
+ d.dataMu.Unlock()
+ }
+ }
+ return nil
+}
+
+func (d *dentry) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes, isDir bool) error {
+ return vfs.GenericCheckPermissions(creds, ats, isDir, uint16(atomic.LoadUint32(&d.mode))&0777, auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid)))
+}
+
+// IncRef implements vfs.DentryImpl.IncRef.
+func (d *dentry) IncRef() {
+ // d.refs may be 0 if d.fs.renameMu is locked, which serializes against
+ // d.checkCachingLocked().
+ atomic.AddInt64(&d.refs, 1)
+}
+
+// TryIncRef implements vfs.DentryImpl.TryIncRef.
+func (d *dentry) TryIncRef() bool {
+ for {
+ refs := atomic.LoadInt64(&d.refs)
+ if refs == 0 {
+ return false
+ }
+ if atomic.CompareAndSwapInt64(&d.refs, refs, refs+1) {
+ return true
+ }
+ }
+}
+
+// DecRef implements vfs.DentryImpl.DecRef.
+func (d *dentry) DecRef() {
+ if refs := atomic.AddInt64(&d.refs, -1); refs == 0 {
+ d.fs.renameMu.Lock()
+ d.checkCachingLocked()
+ d.fs.renameMu.Unlock()
+ } else if refs < 0 {
+ panic("gofer.dentry.DecRef() called without holding a reference")
+ }
+}
+
+// checkCachingLocked should be called after d's reference count becomes 0 or it
+// becomes disowned.
+//
+// Preconditions: d.fs.renameMu must be locked for writing.
+func (d *dentry) checkCachingLocked() {
+ // Dentries with a non-zero reference count must be retained. (The only way
+ // to obtain a reference on a dentry with zero references is via path
+ // resolution, which requires renameMu, so if d.refs is zero then it will
+ // remain zero while we hold renameMu for writing.)
+ if atomic.LoadInt64(&d.refs) != 0 {
+ if d.cached {
+ d.fs.cachedDentries.Remove(d)
+ d.fs.cachedDentriesLen--
+ d.cached = false
+ }
+ return
+ }
+ // Non-child dentries with zero references are no longer reachable by path
+ // resolution and should be dropped immediately.
+ if d.vfsd.Parent() == nil || d.vfsd.IsDisowned() {
+ if d.cached {
+ d.fs.cachedDentries.Remove(d)
+ d.fs.cachedDentriesLen--
+ d.cached = false
+ }
+ d.destroyLocked()
+ return
+ }
+ // If d is already cached, just move it to the front of the LRU.
+ if d.cached {
+ d.fs.cachedDentries.Remove(d)
+ d.fs.cachedDentries.PushFront(d)
+ return
+ }
+ // Cache the dentry, then evict the least recently used cached dentry if
+ // the cache becomes over-full.
+ d.fs.cachedDentries.PushFront(d)
+ d.fs.cachedDentriesLen++
+ d.cached = true
+ if d.fs.cachedDentriesLen > d.fs.opts.maxCachedDentries {
+ victim := d.fs.cachedDentries.Back()
+ d.fs.cachedDentries.Remove(victim)
+ d.fs.cachedDentriesLen--
+ victim.cached = false
+ // victim.refs may have become non-zero from an earlier path
+ // resolution since it was inserted into fs.cachedDentries; see
+ // dentry.incRefLocked(). Either way, we brought
+ // fs.cachedDentriesLen back down to fs.opts.maxCachedDentries, so
+ // we don't loop.
+ if atomic.LoadInt64(&victim.refs) == 0 {
+ if victimParentVFSD := victim.vfsd.Parent(); victimParentVFSD != nil {
+ victimParent := victimParentVFSD.Impl().(*dentry)
+ victimParent.dirMu.Lock()
+ if !victim.vfsd.IsDisowned() {
+ // victim can't be a mount point (in any mount
+ // namespace), since VFS holds references on mount
+ // points.
+ d.fs.vfsfs.VirtualFilesystem().ForceDeleteDentry(&victim.vfsd)
+ // We're only deleting the dentry, not the file it
+ // represents, so we don't need to update
+ // victimParent.dirents etc.
+ }
+ victimParent.dirMu.Unlock()
+ }
+ victim.destroyLocked()
+ }
+ }
+}
+
+// Preconditions: d.fs.renameMu must be locked for writing. d.refs == 0. d is
+// not a child dentry.
+func (d *dentry) destroyLocked() {
+ ctx := context.Background()
+ d.handleMu.Lock()
+ if !d.handle.file.isNil() {
+ mf := d.fs.mfp.MemoryFile()
+ d.dataMu.Lock()
+ // Write dirty pages back to the remote filesystem.
+ if d.handleWritable {
+ if err := fsutil.SyncDirtyAll(ctx, &d.cache, &d.dirty, d.size, mf, d.handle.writeFromBlocksAt); err != nil {
+ log.Warningf("gofer.dentry.DecRef: failed to write dirty data back: %v", err)
+ }
+ }
+ // Discard cached data.
+ d.cache.DropAll(mf)
+ d.dirty.RemoveAll()
+ d.dataMu.Unlock()
+ // Clunk open fids and close open host FDs.
+ d.handle.close(ctx)
+ }
+ d.handleMu.Unlock()
+ d.file.close(ctx)
+ // Remove d from the set of all dentries.
+ d.fs.syncMu.Lock()
+ delete(d.fs.dentries, d)
+ d.fs.syncMu.Unlock()
+ // Drop the reference held by d on its parent.
+ if parentVFSD := d.vfsd.Parent(); parentVFSD != nil {
+ parent := parentVFSD.Impl().(*dentry)
+ // This is parent.DecRef() without recursive locking of d.fs.renameMu.
+ if refs := atomic.AddInt64(&parent.refs, -1); refs == 0 {
+ parent.checkCachingLocked()
+ } else if refs < 0 {
+ panic("gofer.dentry.DecRef() called without holding a reference")
+ }
+ }
+}
+
+func (d *dentry) isDeleted() bool {
+ return atomic.LoadUint32(&d.deleted) != 0
+}
+
+func (d *dentry) setDeleted() {
+ atomic.StoreUint32(&d.deleted, 1)
+}
+
+func (d *dentry) listxattr(ctx context.Context) ([]string, error) {
+ return nil, syserror.ENOTSUP
+}
+
+func (d *dentry) getxattr(ctx context.Context, name string) (string, error) {
+ // TODO(jamieliu): add vfs.GetxattrOptions.Size
+ return d.file.getXattr(ctx, name, linux.XATTR_SIZE_MAX)
+}
+
+func (d *dentry) setxattr(ctx context.Context, opts *vfs.SetxattrOptions) error {
+ return d.file.setXattr(ctx, opts.Name, opts.Value, opts.Flags)
+}
+
+func (d *dentry) removexattr(ctx context.Context, name string) error {
+ return syserror.ENOTSUP
+}
+
+// Preconditions: d.isRegularFile() || d.isDirectory().
+func (d *dentry) ensureSharedHandle(ctx context.Context, read, write, trunc bool) error {
+ // O_TRUNC unconditionally requires us to obtain a new handle (opened with
+ // O_TRUNC).
+ if !trunc {
+ d.handleMu.RLock()
+ if (!read || d.handleReadable) && (!write || d.handleWritable) {
+ // The current handle is sufficient.
+ d.handleMu.RUnlock()
+ return nil
+ }
+ d.handleMu.RUnlock()
+ }
+
+ haveOldFD := false
+ d.handleMu.Lock()
+ if (read && !d.handleReadable) || (write && !d.handleWritable) || trunc {
+ // Get a new handle.
+ wantReadable := d.handleReadable || read
+ wantWritable := d.handleWritable || write
+ h, err := openHandle(ctx, d.file, wantReadable, wantWritable, trunc)
+ if err != nil {
+ d.handleMu.Unlock()
+ return err
+ }
+ if !d.handle.file.isNil() {
+ // Check that old and new handles are compatible: If the old handle
+ // includes a host file descriptor but the new one does not, or
+ // vice versa, old and new memory mappings may be incoherent.
+ haveOldFD = d.handle.fd >= 0
+ haveNewFD := h.fd >= 0
+ if haveOldFD != haveNewFD {
+ d.handleMu.Unlock()
+ ctx.Warningf("gofer.dentry.ensureSharedHandle: can't change host FD availability from %v to %v across dentry handle upgrade", haveOldFD, haveNewFD)
+ h.close(ctx)
+ return syserror.EIO
+ }
+ if haveOldFD {
+ // We may have raced with callers of d.pf.FD() that are now
+ // using the old file descriptor, preventing us from safely
+ // closing it. We could handle this by invalidating existing
+ // memmap.Translations, but this is expensive. Instead, use
+ // dup2() to make the old file descriptor refer to the new file
+ // description, then close the new file descriptor (which is no
+ // longer needed). Racing callers may use the old or new file
+ // description, but this doesn't matter since they refer to the
+ // same file (unless d.fs.opts.overlayfsStaleRead is true,
+ // which we handle separately).
+ if err := syscall.Dup2(int(h.fd), int(d.handle.fd)); err != nil {
+ d.handleMu.Unlock()
+ ctx.Warningf("gofer.dentry.ensureSharedHandle: failed to dup fd %d to fd %d: %v", h.fd, d.handle.fd, err)
+ h.close(ctx)
+ return err
+ }
+ syscall.Close(int(h.fd))
+ h.fd = d.handle.fd
+ if d.fs.opts.overlayfsStaleRead {
+ // Replace sentry mappings of the old FD with mappings of
+ // the new FD, since the two are not necessarily coherent.
+ if err := d.pf.hostFileMapper.RegenerateMappings(int(h.fd)); err != nil {
+ d.handleMu.Unlock()
+ ctx.Warningf("gofer.dentry.ensureSharedHandle: failed to replace sentry mappings of old FD with mappings of new FD: %v", err)
+ h.close(ctx)
+ return err
+ }
+ }
+ // Clunk the old fid before making the new handle visible (by
+ // unlocking d.handleMu).
+ d.handle.file.close(ctx)
+ }
+ }
+ // Switch to the new handle.
+ d.handle = h
+ d.handleReadable = wantReadable
+ d.handleWritable = wantWritable
+ }
+ d.handleMu.Unlock()
+
+ if d.fs.opts.overlayfsStaleRead && haveOldFD {
+ // Invalidate application mappings that may be using the old FD; they
+ // will be replaced with mappings using the new FD after future calls
+ // to d.Translate(). This requires holding d.mapsMu, which precedes
+ // d.handleMu in the lock order.
+ d.mapsMu.Lock()
+ d.mappings.InvalidateAll(memmap.InvalidateOpts{})
+ d.mapsMu.Unlock()
+ }
+
+ return nil
+}
+
+// fileDescription is embedded by gofer implementations of
+// vfs.FileDescriptionImpl.
+type fileDescription struct {
+ vfsfd vfs.FileDescription
+ vfs.FileDescriptionDefaultImpl
+}
+
+func (fd *fileDescription) filesystem() *filesystem {
+ return fd.vfsfd.Mount().Filesystem().Impl().(*filesystem)
+}
+
+func (fd *fileDescription) dentry() *dentry {
+ return fd.vfsfd.Dentry().Impl().(*dentry)
+}
+
+// Stat implements vfs.FileDescriptionImpl.Stat.
+func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
+ d := fd.dentry()
+ if d.fs.opts.interop == InteropModeShared && opts.Mask&(linux.STATX_MODE|linux.STATX_UID|linux.STATX_GID|linux.STATX_ATIME|linux.STATX_MTIME|linux.STATX_CTIME|linux.STATX_SIZE|linux.STATX_BLOCKS|linux.STATX_BTIME) != 0 && opts.Sync != linux.AT_STATX_DONT_SYNC {
+ // TODO(jamieliu): Use specialFileFD.handle.file for the getattr if
+ // available?
+ if err := d.updateFromGetattr(ctx); err != nil {
+ return linux.Statx{}, err
+ }
+ }
+ var stat linux.Statx
+ d.statTo(&stat)
+ return stat, nil
+}
+
+// SetStat implements vfs.FileDescriptionImpl.SetStat.
+func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
+ return fd.dentry().setStat(ctx, auth.CredentialsFromContext(ctx), &opts.Stat, fd.vfsfd.Mount())
+}
+
+// Listxattr implements vfs.FileDescriptionImpl.Listxattr.
+func (fd *fileDescription) Listxattr(ctx context.Context) ([]string, error) {
+ return fd.dentry().listxattr(ctx)
+}
+
+// Getxattr implements vfs.FileDescriptionImpl.Getxattr.
+func (fd *fileDescription) Getxattr(ctx context.Context, name string) (string, error) {
+ return fd.dentry().getxattr(ctx, name)
+}
+
+// Setxattr implements vfs.FileDescriptionImpl.Setxattr.
+func (fd *fileDescription) Setxattr(ctx context.Context, opts vfs.SetxattrOptions) error {
+ return fd.dentry().setxattr(ctx, &opts)
+}
+
+// Removexattr implements vfs.FileDescriptionImpl.Removexattr.
+func (fd *fileDescription) Removexattr(ctx context.Context, name string) error {
+ return fd.dentry().removexattr(ctx, name)
+}
diff --git a/pkg/sentry/fsimpl/gofer/handle.go b/pkg/sentry/fsimpl/gofer/handle.go
new file mode 100644
index 000000000..cfe66f797
--- /dev/null
+++ b/pkg/sentry/fsimpl/gofer/handle.go
@@ -0,0 +1,135 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package gofer
+
+import (
+ "syscall"
+
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/pkg/safemem"
+)
+
+// handle represents a remote "open file descriptor", consisting of an opened
+// fid (p9.File) and optionally a host file descriptor.
+type handle struct {
+ file p9file
+ fd int32 // -1 if unavailable
+}
+
+// Preconditions: read || write.
+func openHandle(ctx context.Context, file p9file, read, write, trunc bool) (handle, error) {
+ _, newfile, err := file.walk(ctx, nil)
+ if err != nil {
+ return handle{fd: -1}, err
+ }
+ var flags p9.OpenFlags
+ switch {
+ case read && !write:
+ flags = p9.ReadOnly
+ case !read && write:
+ flags = p9.WriteOnly
+ case read && write:
+ flags = p9.ReadWrite
+ }
+ if trunc {
+ flags |= p9.OpenTruncate
+ }
+ fdobj, _, _, err := newfile.open(ctx, flags)
+ if err != nil {
+ newfile.close(ctx)
+ return handle{fd: -1}, err
+ }
+ fd := int32(-1)
+ if fdobj != nil {
+ fd = int32(fdobj.Release())
+ }
+ return handle{
+ file: newfile,
+ fd: fd,
+ }, nil
+}
+
+func (h *handle) close(ctx context.Context) {
+ h.file.close(ctx)
+ h.file = p9file{}
+ if h.fd >= 0 {
+ syscall.Close(int(h.fd))
+ h.fd = -1
+ }
+}
+
+func (h *handle) readToBlocksAt(ctx context.Context, dsts safemem.BlockSeq, offset uint64) (uint64, error) {
+ if dsts.IsEmpty() {
+ return 0, nil
+ }
+ if h.fd >= 0 {
+ ctx.UninterruptibleSleepStart(false)
+ n, err := hostPreadv(h.fd, dsts, int64(offset))
+ ctx.UninterruptibleSleepFinish(false)
+ return n, err
+ }
+ if dsts.NumBlocks() == 1 && !dsts.Head().NeedSafecopy() {
+ n, err := h.file.readAt(ctx, dsts.Head().ToSlice(), offset)
+ return uint64(n), err
+ }
+ // Buffer the read since p9.File.ReadAt() takes []byte.
+ buf := make([]byte, dsts.NumBytes())
+ n, err := h.file.readAt(ctx, buf, offset)
+ if n == 0 {
+ return 0, err
+ }
+ if cp, cperr := safemem.CopySeq(dsts, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf[:n]))); cperr != nil {
+ return cp, cperr
+ }
+ return uint64(n), err
+}
+
+func (h *handle) writeFromBlocksAt(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error) {
+ if srcs.IsEmpty() {
+ return 0, nil
+ }
+ if h.fd >= 0 {
+ ctx.UninterruptibleSleepStart(false)
+ n, err := hostPwritev(h.fd, srcs, int64(offset))
+ ctx.UninterruptibleSleepFinish(false)
+ return n, err
+ }
+ if srcs.NumBlocks() == 1 && !srcs.Head().NeedSafecopy() {
+ n, err := h.file.writeAt(ctx, srcs.Head().ToSlice(), offset)
+ return uint64(n), err
+ }
+ // Buffer the write since p9.File.WriteAt() takes []byte.
+ buf := make([]byte, srcs.NumBytes())
+ cp, cperr := safemem.CopySeq(safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf)), srcs)
+ if cp == 0 {
+ return 0, cperr
+ }
+ n, err := h.file.writeAt(ctx, buf[:cp], offset)
+ if err != nil {
+ return uint64(n), err
+ }
+ return cp, cperr
+}
+
+func (h *handle) sync(ctx context.Context) error {
+ if h.fd >= 0 {
+ ctx.UninterruptibleSleepStart(false)
+ err := syscall.Fsync(int(h.fd))
+ ctx.UninterruptibleSleepFinish(false)
+ return err
+ }
+ return h.file.fsync(ctx)
+}
diff --git a/pkg/sentry/fsimpl/gofer/handle_unsafe.go b/pkg/sentry/fsimpl/gofer/handle_unsafe.go
new file mode 100644
index 000000000..19560ab26
--- /dev/null
+++ b/pkg/sentry/fsimpl/gofer/handle_unsafe.go
@@ -0,0 +1,66 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package gofer
+
+import (
+ "syscall"
+ "unsafe"
+
+ "gvisor.dev/gvisor/pkg/safemem"
+)
+
+// Preconditions: !dsts.IsEmpty().
+func hostPreadv(fd int32, dsts safemem.BlockSeq, off int64) (uint64, error) {
+ // No buffering is necessary regardless of safecopy; host syscalls will
+ // return EFAULT if appropriate, instead of raising SIGBUS.
+ if dsts.NumBlocks() == 1 {
+ // Use pread() instead of preadv() to avoid iovec allocation and
+ // copying.
+ dst := dsts.Head()
+ n, _, e := syscall.Syscall6(syscall.SYS_PREAD64, uintptr(fd), dst.Addr(), uintptr(dst.Len()), uintptr(off), 0, 0)
+ if e != 0 {
+ return 0, e
+ }
+ return uint64(n), nil
+ }
+ iovs := safemem.IovecsFromBlockSeq(dsts)
+ n, _, e := syscall.Syscall6(syscall.SYS_PREADV, uintptr(fd), uintptr((unsafe.Pointer)(&iovs[0])), uintptr(len(iovs)), uintptr(off), 0, 0)
+ if e != 0 {
+ return 0, e
+ }
+ return uint64(n), nil
+}
+
+// Preconditions: !srcs.IsEmpty().
+func hostPwritev(fd int32, srcs safemem.BlockSeq, off int64) (uint64, error) {
+ // No buffering is necessary regardless of safecopy; host syscalls will
+ // return EFAULT if appropriate, instead of raising SIGBUS.
+ if srcs.NumBlocks() == 1 {
+ // Use pwrite() instead of pwritev() to avoid iovec allocation and
+ // copying.
+ src := srcs.Head()
+ n, _, e := syscall.Syscall6(syscall.SYS_PWRITE64, uintptr(fd), src.Addr(), uintptr(src.Len()), uintptr(off), 0, 0)
+ if e != 0 {
+ return 0, e
+ }
+ return uint64(n), nil
+ }
+ iovs := safemem.IovecsFromBlockSeq(srcs)
+ n, _, e := syscall.Syscall6(syscall.SYS_PWRITEV, uintptr(fd), uintptr((unsafe.Pointer)(&iovs[0])), uintptr(len(iovs)), uintptr(off), 0, 0)
+ if e != 0 {
+ return 0, e
+ }
+ return uint64(n), nil
+}
diff --git a/pkg/sentry/fsimpl/gofer/p9file.go b/pkg/sentry/fsimpl/gofer/p9file.go
new file mode 100644
index 000000000..755ac2985
--- /dev/null
+++ b/pkg/sentry/fsimpl/gofer/p9file.go
@@ -0,0 +1,219 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package gofer
+
+import (
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/fd"
+ "gvisor.dev/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/pkg/syserror"
+)
+
+// p9file is a wrapper around p9.File that provides methods that are
+// Context-aware.
+type p9file struct {
+ file p9.File
+}
+
+func (f p9file) isNil() bool {
+ return f.file == nil
+}
+
+func (f p9file) walk(ctx context.Context, names []string) ([]p9.QID, p9file, error) {
+ ctx.UninterruptibleSleepStart(false)
+ qids, newfile, err := f.file.Walk(names)
+ ctx.UninterruptibleSleepFinish(false)
+ return qids, p9file{newfile}, err
+}
+
+func (f p9file) walkGetAttr(ctx context.Context, names []string) ([]p9.QID, p9file, p9.AttrMask, p9.Attr, error) {
+ ctx.UninterruptibleSleepStart(false)
+ qids, newfile, attrMask, attr, err := f.file.WalkGetAttr(names)
+ ctx.UninterruptibleSleepFinish(false)
+ return qids, p9file{newfile}, attrMask, attr, err
+}
+
+// walkGetAttrOne is a wrapper around p9.File.WalkGetAttr that takes a single
+// path component and returns a single qid.
+func (f p9file) walkGetAttrOne(ctx context.Context, name string) (p9.QID, p9file, p9.AttrMask, p9.Attr, error) {
+ ctx.UninterruptibleSleepStart(false)
+ qids, newfile, attrMask, attr, err := f.file.WalkGetAttr([]string{name})
+ ctx.UninterruptibleSleepFinish(false)
+ if err != nil {
+ return p9.QID{}, p9file{}, p9.AttrMask{}, p9.Attr{}, err
+ }
+ if len(qids) != 1 {
+ ctx.Warningf("p9.File.WalkGetAttr returned %d qids (%v), wanted 1", len(qids), qids)
+ if newfile != nil {
+ p9file{newfile}.close(ctx)
+ }
+ return p9.QID{}, p9file{}, p9.AttrMask{}, p9.Attr{}, syserror.EIO
+ }
+ return qids[0], p9file{newfile}, attrMask, attr, nil
+}
+
+func (f p9file) statFS(ctx context.Context) (p9.FSStat, error) {
+ ctx.UninterruptibleSleepStart(false)
+ fsstat, err := f.file.StatFS()
+ ctx.UninterruptibleSleepFinish(false)
+ return fsstat, err
+}
+
+func (f p9file) getAttr(ctx context.Context, req p9.AttrMask) (p9.QID, p9.AttrMask, p9.Attr, error) {
+ ctx.UninterruptibleSleepStart(false)
+ qid, attrMask, attr, err := f.file.GetAttr(req)
+ ctx.UninterruptibleSleepFinish(false)
+ return qid, attrMask, attr, err
+}
+
+func (f p9file) setAttr(ctx context.Context, valid p9.SetAttrMask, attr p9.SetAttr) error {
+ ctx.UninterruptibleSleepStart(false)
+ err := f.file.SetAttr(valid, attr)
+ ctx.UninterruptibleSleepFinish(false)
+ return err
+}
+
+func (f p9file) getXattr(ctx context.Context, name string, size uint64) (string, error) {
+ ctx.UninterruptibleSleepStart(false)
+ val, err := f.file.GetXattr(name, size)
+ ctx.UninterruptibleSleepFinish(false)
+ return val, err
+}
+
+func (f p9file) setXattr(ctx context.Context, name, value string, flags uint32) error {
+ ctx.UninterruptibleSleepStart(false)
+ err := f.file.SetXattr(name, value, flags)
+ ctx.UninterruptibleSleepFinish(false)
+ return err
+}
+
+func (f p9file) allocate(ctx context.Context, mode p9.AllocateMode, offset, length uint64) error {
+ ctx.UninterruptibleSleepStart(false)
+ err := f.file.Allocate(mode, offset, length)
+ ctx.UninterruptibleSleepFinish(false)
+ return err
+}
+
+func (f p9file) close(ctx context.Context) error {
+ ctx.UninterruptibleSleepStart(false)
+ err := f.file.Close()
+ ctx.UninterruptibleSleepFinish(false)
+ return err
+}
+
+func (f p9file) open(ctx context.Context, flags p9.OpenFlags) (*fd.FD, p9.QID, uint32, error) {
+ ctx.UninterruptibleSleepStart(false)
+ fdobj, qid, iounit, err := f.file.Open(flags)
+ ctx.UninterruptibleSleepFinish(false)
+ return fdobj, qid, iounit, err
+}
+
+func (f p9file) readAt(ctx context.Context, p []byte, offset uint64) (int, error) {
+ ctx.UninterruptibleSleepStart(false)
+ n, err := f.file.ReadAt(p, offset)
+ ctx.UninterruptibleSleepFinish(false)
+ return n, err
+}
+
+func (f p9file) writeAt(ctx context.Context, p []byte, offset uint64) (int, error) {
+ ctx.UninterruptibleSleepStart(false)
+ n, err := f.file.WriteAt(p, offset)
+ ctx.UninterruptibleSleepFinish(false)
+ return n, err
+}
+
+func (f p9file) fsync(ctx context.Context) error {
+ ctx.UninterruptibleSleepStart(false)
+ err := f.file.FSync()
+ ctx.UninterruptibleSleepFinish(false)
+ return err
+}
+
+func (f p9file) create(ctx context.Context, name string, flags p9.OpenFlags, permissions p9.FileMode, uid p9.UID, gid p9.GID) (*fd.FD, p9file, p9.QID, uint32, error) {
+ ctx.UninterruptibleSleepStart(false)
+ fdobj, newfile, qid, iounit, err := f.file.Create(name, flags, permissions, uid, gid)
+ ctx.UninterruptibleSleepFinish(false)
+ return fdobj, p9file{newfile}, qid, iounit, err
+}
+
+func (f p9file) mkdir(ctx context.Context, name string, permissions p9.FileMode, uid p9.UID, gid p9.GID) (p9.QID, error) {
+ ctx.UninterruptibleSleepStart(false)
+ qid, err := f.file.Mkdir(name, permissions, uid, gid)
+ ctx.UninterruptibleSleepFinish(false)
+ return qid, err
+}
+
+func (f p9file) symlink(ctx context.Context, oldName string, newName string, uid p9.UID, gid p9.GID) (p9.QID, error) {
+ ctx.UninterruptibleSleepStart(false)
+ qid, err := f.file.Symlink(oldName, newName, uid, gid)
+ ctx.UninterruptibleSleepFinish(false)
+ return qid, err
+}
+
+func (f p9file) link(ctx context.Context, target p9file, newName string) error {
+ ctx.UninterruptibleSleepStart(false)
+ err := f.file.Link(target.file, newName)
+ ctx.UninterruptibleSleepFinish(false)
+ return err
+}
+
+func (f p9file) mknod(ctx context.Context, name string, mode p9.FileMode, major uint32, minor uint32, uid p9.UID, gid p9.GID) (p9.QID, error) {
+ ctx.UninterruptibleSleepStart(false)
+ qid, err := f.file.Mknod(name, mode, major, minor, uid, gid)
+ ctx.UninterruptibleSleepFinish(false)
+ return qid, err
+}
+
+func (f p9file) rename(ctx context.Context, newDir p9file, newName string) error {
+ ctx.UninterruptibleSleepStart(false)
+ err := f.file.Rename(newDir.file, newName)
+ ctx.UninterruptibleSleepFinish(false)
+ return err
+}
+
+func (f p9file) unlinkAt(ctx context.Context, name string, flags uint32) error {
+ ctx.UninterruptibleSleepStart(false)
+ err := f.file.UnlinkAt(name, flags)
+ ctx.UninterruptibleSleepFinish(false)
+ return err
+}
+
+func (f p9file) readdir(ctx context.Context, offset uint64, count uint32) ([]p9.Dirent, error) {
+ ctx.UninterruptibleSleepStart(false)
+ dirents, err := f.file.Readdir(offset, count)
+ ctx.UninterruptibleSleepFinish(false)
+ return dirents, err
+}
+
+func (f p9file) readlink(ctx context.Context) (string, error) {
+ ctx.UninterruptibleSleepStart(false)
+ target, err := f.file.Readlink()
+ ctx.UninterruptibleSleepFinish(false)
+ return target, err
+}
+
+func (f p9file) flush(ctx context.Context) error {
+ ctx.UninterruptibleSleepStart(false)
+ err := f.file.Flush()
+ ctx.UninterruptibleSleepFinish(false)
+ return err
+}
+
+func (f p9file) connect(ctx context.Context, flags p9.ConnectFlags) (*fd.FD, error) {
+ ctx.UninterruptibleSleepStart(false)
+ fdobj, err := f.file.Connect(flags)
+ ctx.UninterruptibleSleepFinish(false)
+ return fdobj, err
+}
diff --git a/pkg/sentry/fsimpl/gofer/pagemath.go b/pkg/sentry/fsimpl/gofer/pagemath.go
new file mode 100644
index 000000000..847cb0784
--- /dev/null
+++ b/pkg/sentry/fsimpl/gofer/pagemath.go
@@ -0,0 +1,31 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package gofer
+
+import (
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+// This are equivalent to usermem.Addr.RoundDown/Up, but without the
+// potentially truncating conversion to usermem.Addr. This is necessary because
+// there is no way to define generic "PageRoundDown/Up" functions in Go.
+
+func pageRoundDown(x uint64) uint64 {
+ return x &^ (usermem.PageSize - 1)
+}
+
+func pageRoundUp(x uint64) uint64 {
+ return pageRoundDown(x + usermem.PageSize - 1)
+}
diff --git a/pkg/sentry/fsimpl/gofer/regular_file.go b/pkg/sentry/fsimpl/gofer/regular_file.go
new file mode 100644
index 000000000..8e11e06b3
--- /dev/null
+++ b/pkg/sentry/fsimpl/gofer/regular_file.go
@@ -0,0 +1,860 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package gofer
+
+import (
+ "fmt"
+ "io"
+ "math"
+ "sync"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/safemem"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/pgalloc"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+func (d *dentry) isRegularFile() bool {
+ return d.fileType() == linux.S_IFREG
+}
+
+type regularFileFD struct {
+ fileDescription
+
+ // off is the file offset. off is protected by mu.
+ mu sync.Mutex
+ off int64
+}
+
+// Release implements vfs.FileDescriptionImpl.Release.
+func (fd *regularFileFD) Release() {
+}
+
+// OnClose implements vfs.FileDescriptionImpl.OnClose.
+func (fd *regularFileFD) OnClose(ctx context.Context) error {
+ if !fd.vfsfd.IsWritable() {
+ return nil
+ }
+ // Skip flushing if writes may be buffered by the client, since (as with
+ // the VFS1 client) we don't flush buffered writes on close anyway.
+ d := fd.dentry()
+ if d.fs.opts.interop == InteropModeExclusive {
+ return nil
+ }
+ d.handleMu.RLock()
+ defer d.handleMu.RUnlock()
+ return d.handle.file.flush(ctx)
+}
+
+// PRead implements vfs.FileDescriptionImpl.PRead.
+func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
+ if offset < 0 {
+ return 0, syserror.EINVAL
+ }
+ if opts.Flags != 0 {
+ return 0, syserror.EOPNOTSUPP
+ }
+
+ // Check for reading at EOF before calling into MM (but not under
+ // InteropModeShared, which makes d.size unreliable).
+ d := fd.dentry()
+ if d.fs.opts.interop != InteropModeShared && uint64(offset) >= atomic.LoadUint64(&d.size) {
+ return 0, io.EOF
+ }
+
+ if fd.vfsfd.StatusFlags()&linux.O_DIRECT != 0 {
+ // Lock d.metadataMu for the rest of the read to prevent d.size from
+ // changing.
+ d.metadataMu.Lock()
+ defer d.metadataMu.Unlock()
+ // Write dirty cached pages that will be touched by the read back to
+ // the remote file.
+ if err := d.writeback(ctx, offset, dst.NumBytes()); err != nil {
+ return 0, err
+ }
+ }
+
+ rw := getDentryReadWriter(ctx, d, offset)
+ if fd.vfsfd.StatusFlags()&linux.O_DIRECT != 0 {
+ // Require the read to go to the remote file.
+ rw.direct = true
+ }
+ n, err := dst.CopyOutFrom(ctx, rw)
+ putDentryReadWriter(rw)
+ if d.fs.opts.interop != InteropModeShared {
+ // Compare Linux's mm/filemap.c:do_generic_file_read() => file_accessed().
+ d.touchAtime(ctx, fd.vfsfd.Mount())
+ }
+ return n, err
+}
+
+// Read implements vfs.FileDescriptionImpl.Read.
+func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
+ fd.mu.Lock()
+ n, err := fd.PRead(ctx, dst, fd.off, opts)
+ fd.off += n
+ fd.mu.Unlock()
+ return n, err
+}
+
+// PWrite implements vfs.FileDescriptionImpl.PWrite.
+func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
+ if offset < 0 {
+ return 0, syserror.EINVAL
+ }
+ if opts.Flags != 0 {
+ return 0, syserror.EOPNOTSUPP
+ }
+
+ d := fd.dentry()
+ d.metadataMu.Lock()
+ defer d.metadataMu.Unlock()
+ if d.fs.opts.interop != InteropModeShared {
+ // Compare Linux's mm/filemap.c:__generic_file_write_iter() =>
+ // file_update_time(). This is d.touchCMtime(), but without locking
+ // d.metadataMu (recursively).
+ if now, ok := nowFromContext(ctx); ok {
+ atomic.StoreInt64(&d.mtime, now)
+ atomic.StoreInt64(&d.ctime, now)
+ }
+ }
+ if fd.vfsfd.StatusFlags()&linux.O_DIRECT != 0 {
+ // Write dirty cached pages that will be touched by the write back to
+ // the remote file.
+ if err := d.writeback(ctx, offset, src.NumBytes()); err != nil {
+ return 0, err
+ }
+ // Remove touched pages from the cache.
+ pgstart := pageRoundDown(uint64(offset))
+ pgend := pageRoundUp(uint64(offset + src.NumBytes()))
+ if pgend < pgstart {
+ return 0, syserror.EINVAL
+ }
+ mr := memmap.MappableRange{pgstart, pgend}
+ var freed []platform.FileRange
+ d.dataMu.Lock()
+ cseg := d.cache.LowerBoundSegment(mr.Start)
+ for cseg.Ok() && cseg.Start() < mr.End {
+ cseg = d.cache.Isolate(cseg, mr)
+ freed = append(freed, platform.FileRange{cseg.Value(), cseg.Value() + cseg.Range().Length()})
+ cseg = d.cache.Remove(cseg).NextSegment()
+ }
+ d.dataMu.Unlock()
+ // Invalidate mappings of removed pages.
+ d.mapsMu.Lock()
+ d.mappings.Invalidate(mr, memmap.InvalidateOpts{})
+ d.mapsMu.Unlock()
+ // Finally free pages removed from the cache.
+ mf := d.fs.mfp.MemoryFile()
+ for _, freedFR := range freed {
+ mf.DecRef(freedFR)
+ }
+ }
+ rw := getDentryReadWriter(ctx, d, offset)
+ if fd.vfsfd.StatusFlags()&linux.O_DIRECT != 0 {
+ // Require the write to go to the remote file.
+ rw.direct = true
+ }
+ n, err := src.CopyInTo(ctx, rw)
+ putDentryReadWriter(rw)
+ if n != 0 && fd.vfsfd.StatusFlags()&(linux.O_DSYNC|linux.O_SYNC) != 0 {
+ // Write dirty cached pages touched by the write back to the remote
+ // file.
+ if err := d.writeback(ctx, offset, src.NumBytes()); err != nil {
+ return 0, err
+ }
+ // Request the remote filesystem to sync the remote file.
+ if err := d.handle.file.fsync(ctx); err != nil {
+ return 0, err
+ }
+ }
+ return n, err
+}
+
+// Write implements vfs.FileDescriptionImpl.Write.
+func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
+ fd.mu.Lock()
+ n, err := fd.PWrite(ctx, src, fd.off, opts)
+ fd.off += n
+ fd.mu.Unlock()
+ return n, err
+}
+
+type dentryReadWriter struct {
+ ctx context.Context
+ d *dentry
+ off uint64
+ direct bool
+}
+
+var dentryReadWriterPool = sync.Pool{
+ New: func() interface{} {
+ return &dentryReadWriter{}
+ },
+}
+
+func getDentryReadWriter(ctx context.Context, d *dentry, offset int64) *dentryReadWriter {
+ rw := dentryReadWriterPool.Get().(*dentryReadWriter)
+ rw.ctx = ctx
+ rw.d = d
+ rw.off = uint64(offset)
+ rw.direct = false
+ return rw
+}
+
+func putDentryReadWriter(rw *dentryReadWriter) {
+ rw.ctx = nil
+ rw.d = nil
+ dentryReadWriterPool.Put(rw)
+}
+
+// ReadToBlocks implements safemem.Reader.ReadToBlocks.
+func (rw *dentryReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) {
+ if dsts.IsEmpty() {
+ return 0, nil
+ }
+
+ // If we have a mmappable host FD (which must be used here to ensure
+ // coherence with memory-mapped I/O), or if InteropModeShared is in effect
+ // (which prevents us from caching file contents and makes dentry.size
+ // unreliable), or if the file was opened O_DIRECT, read directly from
+ // dentry.handle without locking dentry.dataMu.
+ rw.d.handleMu.RLock()
+ if (rw.d.handle.fd >= 0 && !rw.d.fs.opts.forcePageCache) || rw.d.fs.opts.interop == InteropModeShared || rw.direct {
+ n, err := rw.d.handle.readToBlocksAt(rw.ctx, dsts, rw.off)
+ rw.d.handleMu.RUnlock()
+ rw.off += n
+ return n, err
+ }
+
+ // Otherwise read from/through the cache.
+ mf := rw.d.fs.mfp.MemoryFile()
+ fillCache := mf.ShouldCacheEvictable()
+ var dataMuUnlock func()
+ if fillCache {
+ rw.d.dataMu.Lock()
+ dataMuUnlock = rw.d.dataMu.Unlock
+ } else {
+ rw.d.dataMu.RLock()
+ dataMuUnlock = rw.d.dataMu.RUnlock
+ }
+
+ // Compute the range to read (limited by file size and overflow-checked).
+ if rw.off >= rw.d.size {
+ dataMuUnlock()
+ rw.d.handleMu.RUnlock()
+ return 0, io.EOF
+ }
+ end := rw.d.size
+ if rend := rw.off + dsts.NumBytes(); rend > rw.off && rend < end {
+ end = rend
+ }
+
+ var done uint64
+ seg, gap := rw.d.cache.Find(rw.off)
+ for rw.off < end {
+ mr := memmap.MappableRange{rw.off, end}
+ switch {
+ case seg.Ok():
+ // Get internal mappings from the cache.
+ ims, err := mf.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), usermem.Read)
+ if err != nil {
+ dataMuUnlock()
+ rw.d.handleMu.RUnlock()
+ return done, err
+ }
+
+ // Copy from internal mappings.
+ n, err := safemem.CopySeq(dsts, ims)
+ done += n
+ rw.off += n
+ dsts = dsts.DropFirst64(n)
+ if err != nil {
+ dataMuUnlock()
+ rw.d.handleMu.RUnlock()
+ return done, err
+ }
+
+ // Continue.
+ seg, gap = seg.NextNonEmpty()
+
+ case gap.Ok():
+ gapMR := gap.Range().Intersect(mr)
+ if fillCache {
+ // Read into the cache, then re-enter the loop to read from the
+ // cache.
+ reqMR := memmap.MappableRange{
+ Start: pageRoundDown(gapMR.Start),
+ End: pageRoundUp(gapMR.End),
+ }
+ optMR := gap.Range()
+ err := rw.d.cache.Fill(rw.ctx, reqMR, maxFillRange(reqMR, optMR), mf, usage.PageCache, rw.d.handle.readToBlocksAt)
+ mf.MarkEvictable(rw.d, pgalloc.EvictableRange{optMR.Start, optMR.End})
+ seg, gap = rw.d.cache.Find(rw.off)
+ if !seg.Ok() {
+ dataMuUnlock()
+ rw.d.handleMu.RUnlock()
+ return done, err
+ }
+ // err might have occurred in part of gap.Range() outside
+ // gapMR. Forget about it for now; if the error matters and
+ // persists, we'll run into it again in a later iteration of
+ // this loop.
+ } else {
+ // Read directly from the file.
+ gapDsts := dsts.TakeFirst64(gapMR.Length())
+ n, err := rw.d.handle.readToBlocksAt(rw.ctx, gapDsts, gapMR.Start)
+ done += n
+ rw.off += n
+ dsts = dsts.DropFirst64(n)
+ // Partial reads are fine. But we must stop reading.
+ if n != gapDsts.NumBytes() || err != nil {
+ dataMuUnlock()
+ rw.d.handleMu.RUnlock()
+ return done, err
+ }
+
+ // Continue.
+ seg, gap = gap.NextSegment(), fsutil.FileRangeGapIterator{}
+ }
+ }
+ }
+ dataMuUnlock()
+ rw.d.handleMu.RUnlock()
+ return done, nil
+}
+
+// WriteFromBlocks implements safemem.Writer.WriteFromBlocks.
+//
+// Preconditions: rw.d.metadataMu must be locked.
+func (rw *dentryReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) {
+ if srcs.IsEmpty() {
+ return 0, nil
+ }
+
+ // If we have a mmappable host FD (which must be used here to ensure
+ // coherence with memory-mapped I/O), or if InteropModeShared is in effect
+ // (which prevents us from caching file contents), or if the file was
+ // opened with O_DIRECT, write directly to dentry.handle without locking
+ // dentry.dataMu.
+ rw.d.handleMu.RLock()
+ if (rw.d.handle.fd >= 0 && !rw.d.fs.opts.forcePageCache) || rw.d.fs.opts.interop == InteropModeShared || rw.direct {
+ n, err := rw.d.handle.writeFromBlocksAt(rw.ctx, srcs, rw.off)
+ rw.d.handleMu.RUnlock()
+ rw.off += n
+ return n, err
+ }
+
+ // Otherwise write to/through the cache.
+ mf := rw.d.fs.mfp.MemoryFile()
+ rw.d.dataMu.Lock()
+
+ // Compute the range to write (overflow-checked).
+ start := rw.off
+ end := rw.off + srcs.NumBytes()
+ if end <= rw.off {
+ end = math.MaxInt64
+ }
+
+ var (
+ done uint64
+ retErr error
+ )
+ seg, gap := rw.d.cache.Find(rw.off)
+ for rw.off < end {
+ mr := memmap.MappableRange{rw.off, end}
+ switch {
+ case seg.Ok():
+ // Get internal mappings from the cache.
+ segMR := seg.Range().Intersect(mr)
+ ims, err := mf.MapInternal(seg.FileRangeOf(segMR), usermem.Write)
+ if err != nil {
+ retErr = err
+ goto exitLoop
+ }
+
+ // Copy to internal mappings.
+ n, err := safemem.CopySeq(ims, srcs)
+ done += n
+ rw.off += n
+ srcs = srcs.DropFirst64(n)
+ rw.d.dirty.MarkDirty(segMR)
+ if err != nil {
+ retErr = err
+ goto exitLoop
+ }
+
+ // Continue.
+ seg, gap = seg.NextNonEmpty()
+
+ case gap.Ok():
+ // Write directly to the file. At present, we never fill the cache
+ // when writing, since doing so can convert small writes into
+ // inefficient read-modify-write cycles, and we have no mechanism
+ // for detecting or avoiding this.
+ gapMR := gap.Range().Intersect(mr)
+ gapSrcs := srcs.TakeFirst64(gapMR.Length())
+ n, err := rw.d.handle.writeFromBlocksAt(rw.ctx, gapSrcs, gapMR.Start)
+ done += n
+ rw.off += n
+ srcs = srcs.DropFirst64(n)
+ // Partial writes are fine. But we must stop writing.
+ if n != gapSrcs.NumBytes() || err != nil {
+ retErr = err
+ goto exitLoop
+ }
+
+ // Continue.
+ seg, gap = gap.NextSegment(), fsutil.FileRangeGapIterator{}
+ }
+ }
+exitLoop:
+ if rw.off > rw.d.size {
+ atomic.StoreUint64(&rw.d.size, rw.off)
+ // The remote file's size will implicitly be extended to the correct
+ // value when we write back to it.
+ }
+ // If InteropModeWritethrough is in effect, flush written data back to the
+ // remote filesystem.
+ if rw.d.fs.opts.interop == InteropModeWritethrough && done != 0 {
+ if err := fsutil.SyncDirty(rw.ctx, memmap.MappableRange{
+ Start: start,
+ End: rw.off,
+ }, &rw.d.cache, &rw.d.dirty, rw.d.size, mf, rw.d.handle.writeFromBlocksAt); err != nil {
+ // We have no idea how many bytes were actually flushed.
+ rw.off = start
+ done = 0
+ retErr = err
+ }
+ }
+ rw.d.dataMu.Unlock()
+ rw.d.handleMu.RUnlock()
+ return done, retErr
+}
+
+func (d *dentry) writeback(ctx context.Context, offset, size int64) error {
+ if size == 0 {
+ return nil
+ }
+ d.handleMu.RLock()
+ defer d.handleMu.RUnlock()
+ d.dataMu.Lock()
+ defer d.dataMu.Unlock()
+ // Compute the range of valid bytes (overflow-checked).
+ if uint64(offset) >= d.size {
+ return nil
+ }
+ end := int64(d.size)
+ if rend := offset + size; rend > offset && rend < end {
+ end = rend
+ }
+ return fsutil.SyncDirty(ctx, memmap.MappableRange{
+ Start: uint64(offset),
+ End: uint64(end),
+ }, &d.cache, &d.dirty, d.size, d.fs.mfp.MemoryFile(), d.handle.writeFromBlocksAt)
+}
+
+// Seek implements vfs.FileDescriptionImpl.Seek.
+func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
+ fd.mu.Lock()
+ defer fd.mu.Unlock()
+ switch whence {
+ case linux.SEEK_SET:
+ // Use offset as specified.
+ case linux.SEEK_CUR:
+ offset += fd.off
+ case linux.SEEK_END, linux.SEEK_DATA, linux.SEEK_HOLE:
+ // Ensure file size is up to date.
+ d := fd.dentry()
+ if fd.filesystem().opts.interop == InteropModeShared {
+ if err := d.updateFromGetattr(ctx); err != nil {
+ return 0, err
+ }
+ }
+ size := int64(atomic.LoadUint64(&d.size))
+ // For SEEK_DATA and SEEK_HOLE, treat the file as a single contiguous
+ // block of data.
+ switch whence {
+ case linux.SEEK_END:
+ offset += size
+ case linux.SEEK_DATA:
+ if offset > size {
+ return 0, syserror.ENXIO
+ }
+ // Use offset as specified.
+ case linux.SEEK_HOLE:
+ if offset > size {
+ return 0, syserror.ENXIO
+ }
+ offset = size
+ }
+ default:
+ return 0, syserror.EINVAL
+ }
+ if offset < 0 {
+ return 0, syserror.EINVAL
+ }
+ fd.off = offset
+ return offset, nil
+}
+
+// Sync implements vfs.FileDescriptionImpl.Sync.
+func (fd *regularFileFD) Sync(ctx context.Context) error {
+ return fd.dentry().syncSharedHandle(ctx)
+}
+
+func (d *dentry) syncSharedHandle(ctx context.Context) error {
+ d.handleMu.RLock()
+ if !d.handleWritable {
+ d.handleMu.RUnlock()
+ return nil
+ }
+ d.dataMu.Lock()
+ // Write dirty cached data to the remote file.
+ err := fsutil.SyncDirtyAll(ctx, &d.cache, &d.dirty, d.size, d.fs.mfp.MemoryFile(), d.handle.writeFromBlocksAt)
+ d.dataMu.Unlock()
+ if err == nil {
+ // Sync the remote file.
+ err = d.handle.sync(ctx)
+ }
+ d.handleMu.RUnlock()
+ return err
+}
+
+// ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap.
+func (fd *regularFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error {
+ d := fd.dentry()
+ switch d.fs.opts.interop {
+ case InteropModeExclusive:
+ // Any mapping is fine.
+ case InteropModeWritethrough:
+ // Shared writable mappings require a host FD, since otherwise we can't
+ // synchronously flush memory-mapped writes to the remote file.
+ if opts.Private || !opts.MaxPerms.Write {
+ break
+ }
+ fallthrough
+ case InteropModeShared:
+ // All mappings require a host FD to be coherent with other filesystem
+ // users.
+ if d.fs.opts.forcePageCache {
+ // Whether or not we have a host FD, we're not allowed to use it.
+ return syserror.ENODEV
+ }
+ d.handleMu.RLock()
+ haveFD := d.handle.fd >= 0
+ d.handleMu.RUnlock()
+ if !haveFD {
+ return syserror.ENODEV
+ }
+ default:
+ panic(fmt.Sprintf("unknown InteropMode %v", d.fs.opts.interop))
+ }
+ return vfs.GenericConfigureMMap(&fd.vfsfd, d, opts)
+}
+
+func (d *dentry) mayCachePages() bool {
+ if d.fs.opts.interop == InteropModeShared {
+ return false
+ }
+ if d.fs.opts.forcePageCache {
+ return true
+ }
+ d.handleMu.RLock()
+ haveFD := d.handle.fd >= 0
+ d.handleMu.RUnlock()
+ return haveFD
+}
+
+// AddMapping implements memmap.Mappable.AddMapping.
+func (d *dentry) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) error {
+ d.mapsMu.Lock()
+ mapped := d.mappings.AddMapping(ms, ar, offset, writable)
+ // Do this unconditionally since whether we have a host FD can change
+ // across save/restore.
+ for _, r := range mapped {
+ d.pf.hostFileMapper.IncRefOn(r)
+ }
+ if d.mayCachePages() {
+ // d.Evict() will refuse to evict memory-mapped pages, so tell the
+ // MemoryFile to not bother trying.
+ mf := d.fs.mfp.MemoryFile()
+ for _, r := range mapped {
+ mf.MarkUnevictable(d, pgalloc.EvictableRange{r.Start, r.End})
+ }
+ }
+ d.mapsMu.Unlock()
+ return nil
+}
+
+// RemoveMapping implements memmap.Mappable.RemoveMapping.
+func (d *dentry) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) {
+ d.mapsMu.Lock()
+ unmapped := d.mappings.RemoveMapping(ms, ar, offset, writable)
+ for _, r := range unmapped {
+ d.pf.hostFileMapper.DecRefOn(r)
+ }
+ if d.mayCachePages() {
+ // Pages that are no longer referenced by any application memory
+ // mappings are now considered unused; allow MemoryFile to evict them
+ // when necessary.
+ mf := d.fs.mfp.MemoryFile()
+ d.dataMu.Lock()
+ for _, r := range unmapped {
+ // Since these pages are no longer mapped, they are no longer
+ // concurrently dirtyable by a writable memory mapping.
+ d.dirty.AllowClean(r)
+ mf.MarkEvictable(d, pgalloc.EvictableRange{r.Start, r.End})
+ }
+ d.dataMu.Unlock()
+ }
+ d.mapsMu.Unlock()
+}
+
+// CopyMapping implements memmap.Mappable.CopyMapping.
+func (d *dentry) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, writable bool) error {
+ return d.AddMapping(ctx, ms, dstAR, offset, writable)
+}
+
+// Translate implements memmap.Mappable.Translate.
+func (d *dentry) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) {
+ d.handleMu.RLock()
+ if d.handle.fd >= 0 && !d.fs.opts.forcePageCache {
+ d.handleMu.RUnlock()
+ mr := optional
+ if d.fs.opts.limitHostFDTranslation {
+ mr = maxFillRange(required, optional)
+ }
+ return []memmap.Translation{
+ {
+ Source: mr,
+ File: &d.pf,
+ Offset: mr.Start,
+ Perms: usermem.AnyAccess,
+ },
+ }, nil
+ }
+
+ d.dataMu.Lock()
+
+ // Constrain translations to d.size (rounded up) to prevent translation to
+ // pages that may be concurrently truncated.
+ pgend := pageRoundUp(d.size)
+ var beyondEOF bool
+ if required.End > pgend {
+ if required.Start >= pgend {
+ d.dataMu.Unlock()
+ d.handleMu.RUnlock()
+ return nil, &memmap.BusError{io.EOF}
+ }
+ beyondEOF = true
+ required.End = pgend
+ }
+ if optional.End > pgend {
+ optional.End = pgend
+ }
+
+ mf := d.fs.mfp.MemoryFile()
+ cerr := d.cache.Fill(ctx, required, maxFillRange(required, optional), mf, usage.PageCache, d.handle.readToBlocksAt)
+
+ var ts []memmap.Translation
+ var translatedEnd uint64
+ for seg := d.cache.FindSegment(required.Start); seg.Ok() && seg.Start() < required.End; seg, _ = seg.NextNonEmpty() {
+ segMR := seg.Range().Intersect(optional)
+ // TODO(jamieliu): Make Translations writable even if writability is
+ // not required if already kept-dirty by another writable translation.
+ perms := usermem.AccessType{
+ Read: true,
+ Execute: true,
+ }
+ if at.Write {
+ // From this point forward, this memory can be dirtied through the
+ // mapping at any time.
+ d.dirty.KeepDirty(segMR)
+ perms.Write = true
+ }
+ ts = append(ts, memmap.Translation{
+ Source: segMR,
+ File: mf,
+ Offset: seg.FileRangeOf(segMR).Start,
+ Perms: perms,
+ })
+ translatedEnd = segMR.End
+ }
+
+ d.dataMu.Unlock()
+ d.handleMu.RUnlock()
+
+ // Don't return the error returned by c.cache.Fill if it occurred outside
+ // of required.
+ if translatedEnd < required.End && cerr != nil {
+ return ts, &memmap.BusError{cerr}
+ }
+ if beyondEOF {
+ return ts, &memmap.BusError{io.EOF}
+ }
+ return ts, nil
+}
+
+func maxFillRange(required, optional memmap.MappableRange) memmap.MappableRange {
+ const maxReadahead = 64 << 10 // 64 KB, chosen arbitrarily
+ if required.Length() >= maxReadahead {
+ return required
+ }
+ if optional.Length() <= maxReadahead {
+ return optional
+ }
+ optional.Start = required.Start
+ if optional.Length() <= maxReadahead {
+ return optional
+ }
+ optional.End = optional.Start + maxReadahead
+ return optional
+}
+
+// InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable.
+func (d *dentry) InvalidateUnsavable(ctx context.Context) error {
+ // Whether we have a host fd (and consequently what platform.File is
+ // mapped) can change across save/restore, so invalidate all translations
+ // unconditionally.
+ d.mapsMu.Lock()
+ defer d.mapsMu.Unlock()
+ d.mappings.InvalidateAll(memmap.InvalidateOpts{})
+
+ // Write the cache's contents back to the remote file so that if we have a
+ // host fd after restore, the remote file's contents are coherent.
+ mf := d.fs.mfp.MemoryFile()
+ d.dataMu.Lock()
+ defer d.dataMu.Unlock()
+ if err := fsutil.SyncDirtyAll(ctx, &d.cache, &d.dirty, d.size, mf, d.handle.writeFromBlocksAt); err != nil {
+ return err
+ }
+
+ // Discard the cache so that it's not stored in saved state. This is safe
+ // because per InvalidateUnsavable invariants, no new translations can have
+ // been returned after we invalidated all existing translations above.
+ d.cache.DropAll(mf)
+ d.dirty.RemoveAll()
+
+ return nil
+}
+
+// Evict implements pgalloc.EvictableMemoryUser.Evict.
+func (d *dentry) Evict(ctx context.Context, er pgalloc.EvictableRange) {
+ d.mapsMu.Lock()
+ defer d.mapsMu.Unlock()
+ d.dataMu.Lock()
+ defer d.dataMu.Unlock()
+
+ mr := memmap.MappableRange{er.Start, er.End}
+ mf := d.fs.mfp.MemoryFile()
+ // Only allow pages that are no longer memory-mapped to be evicted.
+ for mgap := d.mappings.LowerBoundGap(mr.Start); mgap.Ok() && mgap.Start() < mr.End; mgap = mgap.NextGap() {
+ mgapMR := mgap.Range().Intersect(mr)
+ if mgapMR.Length() == 0 {
+ continue
+ }
+ if err := fsutil.SyncDirty(ctx, mgapMR, &d.cache, &d.dirty, d.size, mf, d.handle.writeFromBlocksAt); err != nil {
+ log.Warningf("Failed to writeback cached data %v: %v", mgapMR, err)
+ }
+ d.cache.Drop(mgapMR, mf)
+ d.dirty.KeepClean(mgapMR)
+ }
+}
+
+// dentryPlatformFile implements platform.File. It exists solely because dentry
+// cannot implement both vfs.DentryImpl.IncRef and platform.File.IncRef.
+//
+// dentryPlatformFile is only used when a host FD representing the remote file
+// is available (i.e. dentry.handle.fd >= 0), and that FD is used for
+// application memory mappings (i.e. !filesystem.opts.forcePageCache).
+type dentryPlatformFile struct {
+ *dentry
+
+ // fdRefs counts references on platform.File offsets. fdRefs is protected
+ // by dentry.dataMu.
+ fdRefs fsutil.FrameRefSet
+
+ // If this dentry represents a regular file, and handle.fd >= 0,
+ // hostFileMapper caches mappings of handle.fd.
+ hostFileMapper fsutil.HostFileMapper
+}
+
+// IncRef implements platform.File.IncRef.
+func (d *dentryPlatformFile) IncRef(fr platform.FileRange) {
+ d.dataMu.Lock()
+ seg, gap := d.fdRefs.Find(fr.Start)
+ for {
+ switch {
+ case seg.Ok() && seg.Start() < fr.End:
+ seg = d.fdRefs.Isolate(seg, fr)
+ seg.SetValue(seg.Value() + 1)
+ seg, gap = seg.NextNonEmpty()
+ case gap.Ok() && gap.Start() < fr.End:
+ newRange := gap.Range().Intersect(fr)
+ usage.MemoryAccounting.Inc(newRange.Length(), usage.Mapped)
+ seg, gap = d.fdRefs.InsertWithoutMerging(gap, newRange, 1).NextNonEmpty()
+ default:
+ d.fdRefs.MergeAdjacent(fr)
+ d.dataMu.Unlock()
+ return
+ }
+ }
+}
+
+// DecRef implements platform.File.DecRef.
+func (d *dentryPlatformFile) DecRef(fr platform.FileRange) {
+ d.dataMu.Lock()
+ seg := d.fdRefs.FindSegment(fr.Start)
+
+ for seg.Ok() && seg.Start() < fr.End {
+ seg = d.fdRefs.Isolate(seg, fr)
+ if old := seg.Value(); old == 1 {
+ usage.MemoryAccounting.Dec(seg.Range().Length(), usage.Mapped)
+ seg = d.fdRefs.Remove(seg).NextSegment()
+ } else {
+ seg.SetValue(old - 1)
+ seg = seg.NextSegment()
+ }
+ }
+ d.fdRefs.MergeAdjacent(fr)
+ d.dataMu.Unlock()
+
+}
+
+// MapInternal implements platform.File.MapInternal.
+func (d *dentryPlatformFile) MapInternal(fr platform.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) {
+ d.handleMu.RLock()
+ bs, err := d.hostFileMapper.MapInternal(fr, int(d.handle.fd), at.Write)
+ d.handleMu.RUnlock()
+ return bs, err
+}
+
+// FD implements platform.File.FD.
+func (d *dentryPlatformFile) FD() int {
+ d.handleMu.RLock()
+ fd := d.handle.fd
+ d.handleMu.RUnlock()
+ return int(fd)
+}
diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go
new file mode 100644
index 000000000..08c691c47
--- /dev/null
+++ b/pkg/sentry/fsimpl/gofer/special_file.go
@@ -0,0 +1,159 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package gofer
+
+import (
+ "sync"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/safemem"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+// specialFileFD implements vfs.FileDescriptionImpl for files other than
+// regular files, directories, and symlinks: pipes, sockets, etc. It is also
+// used for regular files when filesystemOptions.specialRegularFiles is in
+// effect. specialFileFD differs from regularFileFD by using per-FD handles
+// instead of shared per-dentry handles, and never buffering I/O.
+type specialFileFD struct {
+ fileDescription
+
+ // handle is immutable.
+ handle handle
+
+ // off is the file offset. off is protected by mu. (POSIX 2.9.7 only
+ // requires operations using the file offset to be atomic for regular files
+ // and symlinks; however, since specialFileFD may be used for regular
+ // files, we apply this atomicity unconditionally.)
+ mu sync.Mutex
+ off int64
+}
+
+// Release implements vfs.FileDescriptionImpl.Release.
+func (fd *specialFileFD) Release() {
+ fd.handle.close(context.Background())
+ fs := fd.vfsfd.Mount().Filesystem().Impl().(*filesystem)
+ fs.syncMu.Lock()
+ delete(fs.specialFileFDs, fd)
+ fs.syncMu.Unlock()
+}
+
+// OnClose implements vfs.FileDescriptionImpl.OnClose.
+func (fd *specialFileFD) OnClose(ctx context.Context) error {
+ if !fd.vfsfd.IsWritable() {
+ return nil
+ }
+ return fd.handle.file.flush(ctx)
+}
+
+// PRead implements vfs.FileDescriptionImpl.PRead.
+func (fd *specialFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
+ if offset < 0 {
+ return 0, syserror.EINVAL
+ }
+ if opts.Flags != 0 {
+ return 0, syserror.EOPNOTSUPP
+ }
+
+ // Going through dst.CopyOutFrom() holds MM locks around file operations of
+ // unknown duration. For regularFileFD, doing so is necessary to support
+ // mmap due to lock ordering; MM locks precede dentry.dataMu. That doesn't
+ // hold here since specialFileFD doesn't client-cache data. Just buffer the
+ // read instead.
+ if d := fd.dentry(); d.fs.opts.interop != InteropModeShared {
+ d.touchAtime(ctx, fd.vfsfd.Mount())
+ }
+ buf := make([]byte, dst.NumBytes())
+ n, err := fd.handle.readToBlocksAt(ctx, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf)), uint64(offset))
+ if n == 0 {
+ return 0, err
+ }
+ if cp, cperr := dst.CopyOut(ctx, buf[:n]); cperr != nil {
+ return int64(cp), cperr
+ }
+ return int64(n), err
+}
+
+// Read implements vfs.FileDescriptionImpl.Read.
+func (fd *specialFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
+ fd.mu.Lock()
+ n, err := fd.PRead(ctx, dst, fd.off, opts)
+ fd.off += n
+ fd.mu.Unlock()
+ return n, err
+}
+
+// PWrite implements vfs.FileDescriptionImpl.PWrite.
+func (fd *specialFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
+ if offset < 0 {
+ return 0, syserror.EINVAL
+ }
+ if opts.Flags != 0 {
+ return 0, syserror.EOPNOTSUPP
+ }
+
+ // Do a buffered write. See rationale in PRead.
+ if d := fd.dentry(); d.fs.opts.interop != InteropModeShared {
+ d.touchCMtime(ctx)
+ }
+ buf := make([]byte, src.NumBytes())
+ // Don't do partial writes if we get a partial read from src.
+ if _, err := src.CopyIn(ctx, buf); err != nil {
+ return 0, err
+ }
+ n, err := fd.handle.writeFromBlocksAt(ctx, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf)), uint64(offset))
+ return int64(n), err
+}
+
+// Write implements vfs.FileDescriptionImpl.Write.
+func (fd *specialFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
+ fd.mu.Lock()
+ n, err := fd.PWrite(ctx, src, fd.off, opts)
+ fd.off += n
+ fd.mu.Unlock()
+ return n, err
+}
+
+// Seek implements vfs.FileDescriptionImpl.Seek.
+func (fd *specialFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
+ fd.mu.Lock()
+ defer fd.mu.Unlock()
+ switch whence {
+ case linux.SEEK_SET:
+ // Use offset as given.
+ case linux.SEEK_CUR:
+ offset += fd.off
+ default:
+ // SEEK_END, SEEK_DATA, and SEEK_HOLE aren't supported since it's not
+ // clear that file size is even meaningful for these files.
+ return 0, syserror.EINVAL
+ }
+ if offset < 0 {
+ return 0, syserror.EINVAL
+ }
+ fd.off = offset
+ return offset, nil
+}
+
+// Sync implements vfs.FileDescriptionImpl.Sync.
+func (fd *specialFileFD) Sync(ctx context.Context) error {
+ if !fd.vfsfd.IsWritable() {
+ return nil
+ }
+ return fd.handle.sync(ctx)
+}
diff --git a/pkg/sentry/fsimpl/gofer/symlink.go b/pkg/sentry/fsimpl/gofer/symlink.go
new file mode 100644
index 000000000..adf43be60
--- /dev/null
+++ b/pkg/sentry/fsimpl/gofer/symlink.go
@@ -0,0 +1,47 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package gofer
+
+import (
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+)
+
+func (d *dentry) isSymlink() bool {
+ return d.fileType() == linux.S_IFLNK
+}
+
+// Precondition: d.isSymlink().
+func (d *dentry) readlink(ctx context.Context, mnt *vfs.Mount) (string, error) {
+ if d.fs.opts.interop != InteropModeShared {
+ d.touchAtime(ctx, mnt)
+ d.dataMu.Lock()
+ if d.haveTarget {
+ target := d.target
+ d.dataMu.Unlock()
+ return target, nil
+ }
+ }
+ target, err := d.file.readlink(ctx)
+ if d.fs.opts.interop != InteropModeShared {
+ if err == nil {
+ d.haveTarget = true
+ d.target = target
+ }
+ d.dataMu.Unlock()
+ }
+ return target, err
+}
diff --git a/pkg/sentry/fsimpl/gofer/time.go b/pkg/sentry/fsimpl/gofer/time.go
new file mode 100644
index 000000000..7598ec6a8
--- /dev/null
+++ b/pkg/sentry/fsimpl/gofer/time.go
@@ -0,0 +1,75 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package gofer
+
+import (
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+)
+
+func dentryTimestampFromP9(s, ns uint64) int64 {
+ return int64(s*1e9 + ns)
+}
+
+func dentryTimestampFromStatx(ts linux.StatxTimestamp) int64 {
+ return ts.Sec*1e9 + int64(ts.Nsec)
+}
+
+func statxTimestampFromDentry(ns int64) linux.StatxTimestamp {
+ return linux.StatxTimestamp{
+ Sec: ns / 1e9,
+ Nsec: uint32(ns % 1e9),
+ }
+}
+
+func nowFromContext(ctx context.Context) (int64, bool) {
+ if clock := ktime.RealtimeClockFromContext(ctx); clock != nil {
+ return clock.Now().Nanoseconds(), true
+ }
+ return 0, false
+}
+
+// Preconditions: fs.interop != InteropModeShared.
+func (d *dentry) touchAtime(ctx context.Context, mnt *vfs.Mount) {
+ if err := mnt.CheckBeginWrite(); err != nil {
+ return
+ }
+ now, ok := nowFromContext(ctx)
+ if !ok {
+ mnt.EndWrite()
+ return
+ }
+ d.metadataMu.Lock()
+ atomic.StoreInt64(&d.atime, now)
+ d.metadataMu.Unlock()
+ mnt.EndWrite()
+}
+
+// Preconditions: fs.interop != InteropModeShared. The caller has successfully
+// called vfs.Mount.CheckBeginWrite().
+func (d *dentry) touchCMtime(ctx context.Context) {
+ now, ok := nowFromContext(ctx)
+ if !ok {
+ return
+ }
+ d.metadataMu.Lock()
+ atomic.StoreInt64(&d.mtime, now)
+ atomic.StoreInt64(&d.ctime, now)
+ d.metadataMu.Unlock()
+}
diff --git a/pkg/sentry/fsimpl/testutil/testutil.go b/pkg/sentry/fsimpl/testutil/testutil.go
index 1c98335c1..69fd84ddd 100644
--- a/pkg/sentry/fsimpl/testutil/testutil.go
+++ b/pkg/sentry/fsimpl/testutil/testutil.go
@@ -98,7 +98,7 @@ func (s *System) WithTemporaryContext(ctx context.Context) *System {
// Destroy release resources associated with a test system.
func (s *System) Destroy() {
s.Root.DecRef()
- s.mns.DecRef(s.VFS) // Reference on mns passed to NewSystem.
+ s.mns.DecRef() // Reference on mns passed to NewSystem.
}
// ReadToEnd reads the contents of fd until EOF to a string.
diff --git a/pkg/sentry/fsimpl/tmpfs/BUILD b/pkg/sentry/fsimpl/tmpfs/BUILD
index c61366224..57abd5583 100644
--- a/pkg/sentry/fsimpl/tmpfs/BUILD
+++ b/pkg/sentry/fsimpl/tmpfs/BUILD
@@ -38,6 +38,7 @@ go_library(
"//pkg/sentry/arch",
"//pkg/sentry/fs",
"//pkg/sentry/fs/fsutil",
+ "//pkg/sentry/fs/lock",
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/kernel/pipe",
@@ -47,6 +48,7 @@ go_library(
"//pkg/sentry/platform",
"//pkg/sentry/usage",
"//pkg/sentry/vfs",
+ "//pkg/sentry/vfs/lock",
"//pkg/sync",
"//pkg/syserror",
"//pkg/usermem",
@@ -86,6 +88,7 @@ go_test(
"//pkg/context",
"//pkg/fspath",
"//pkg/sentry/contexttest",
+ "//pkg/sentry/fs/lock",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/kernel/contexttest",
"//pkg/sentry/vfs",
diff --git a/pkg/sentry/fsimpl/tmpfs/benchmark_test.go b/pkg/sentry/fsimpl/tmpfs/benchmark_test.go
index 54241c8e8..9fce5e4b4 100644
--- a/pkg/sentry/fsimpl/tmpfs/benchmark_test.go
+++ b/pkg/sentry/fsimpl/tmpfs/benchmark_test.go
@@ -183,7 +183,7 @@ func BenchmarkVFS2MemfsStat(b *testing.B) {
if err != nil {
b.Fatalf("failed to create tmpfs root mount: %v", err)
}
- defer mntns.DecRef(vfsObj)
+ defer mntns.DecRef()
var filePathBuilder strings.Builder
filePathBuilder.WriteByte('/')
@@ -374,7 +374,7 @@ func BenchmarkVFS2MemfsMountStat(b *testing.B) {
if err != nil {
b.Fatalf("failed to create tmpfs root mount: %v", err)
}
- defer mntns.DecRef(vfsObj)
+ defer mntns.DecRef()
var filePathBuilder strings.Builder
filePathBuilder.WriteByte('/')
diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go
index 5ee9cf1e9..72bc15264 100644
--- a/pkg/sentry/fsimpl/tmpfs/filesystem.go
+++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go
@@ -622,7 +622,7 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error
if child.inode.isDir() {
return syserror.EISDIR
}
- if !rp.MustBeDir() {
+ if rp.MustBeDir() {
return syserror.ENOTDIR
}
mnt := rp.Mount()
diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file.go b/pkg/sentry/fsimpl/tmpfs/regular_file.go
index e9e6faf67..dab346a41 100644
--- a/pkg/sentry/fsimpl/tmpfs/regular_file.go
+++ b/pkg/sentry/fsimpl/tmpfs/regular_file.go
@@ -23,6 +23,7 @@ import (
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/pgalloc"
@@ -192,6 +193,28 @@ func (fd *regularFileFD) Sync(ctx context.Context) error {
return nil
}
+// LockBSD implements vfs.FileDescriptionImpl.LockBSD.
+func (fd *regularFileFD) LockBSD(ctx context.Context, uid lock.UniqueID, t lock.LockType, block lock.Blocker) error {
+ return fd.inode().lockBSD(uid, t, block)
+}
+
+// UnlockBSD implements vfs.FileDescriptionImpl.UnlockBSD.
+func (fd *regularFileFD) UnlockBSD(ctx context.Context, uid lock.UniqueID) error {
+ fd.inode().unlockBSD(uid)
+ return nil
+}
+
+// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
+func (fd *regularFileFD) LockPOSIX(ctx context.Context, uid lock.UniqueID, t lock.LockType, rng lock.LockRange, block lock.Blocker) error {
+ return fd.inode().lockPOSIX(uid, t, rng, block)
+}
+
+// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
+func (fd *regularFileFD) UnlockPOSIX(ctx context.Context, uid lock.UniqueID, rng lock.LockRange) error {
+ fd.inode().unlockPOSIX(uid, rng)
+ return nil
+}
+
// regularFileReadWriter implements safemem.Reader and Safemem.Writer.
type regularFileReadWriter struct {
file *regularFile
diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file_test.go b/pkg/sentry/fsimpl/tmpfs/regular_file_test.go
index 32552e261..e9f71e334 100644
--- a/pkg/sentry/fsimpl/tmpfs/regular_file_test.go
+++ b/pkg/sentry/fsimpl/tmpfs/regular_file_test.go
@@ -24,9 +24,11 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fspath"
+ "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/kernel/contexttest"
"gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -49,7 +51,7 @@ func newTmpfsRoot(ctx context.Context) (*vfs.VirtualFilesystem, vfs.VirtualDentr
root := mntns.Root()
return vfsObj, root, func() {
root.DecRef()
- mntns.DecRef(vfsObj)
+ mntns.DecRef()
}, nil
}
@@ -260,6 +262,60 @@ func TestPWrite(t *testing.T) {
}
}
+func TestLocks(t *testing.T) {
+ ctx := contexttest.Context(t)
+ fd, cleanup, err := newFileFD(ctx, 0644)
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer cleanup()
+
+ var (
+ uid1 lock.UniqueID
+ uid2 lock.UniqueID
+ // Non-blocking.
+ block lock.Blocker
+ )
+
+ uid1 = 123
+ uid2 = 456
+
+ if err := fd.Impl().LockBSD(ctx, uid1, lock.ReadLock, block); err != nil {
+ t.Fatalf("fd.Impl().LockBSD failed: err = %v", err)
+ }
+ if err := fd.Impl().LockBSD(ctx, uid2, lock.ReadLock, block); err != nil {
+ t.Fatalf("fd.Impl().LockBSD failed: err = %v", err)
+ }
+ if got, want := fd.Impl().LockBSD(ctx, uid2, lock.WriteLock, block), syserror.ErrWouldBlock; got != want {
+ t.Fatalf("fd.Impl().LockBSD failed: got = %v, want = %v", got, want)
+ }
+ if err := fd.Impl().UnlockBSD(ctx, uid1); err != nil {
+ t.Fatalf("fd.Impl().UnlockBSD failed: err = %v", err)
+ }
+ if err := fd.Impl().LockBSD(ctx, uid2, lock.WriteLock, block); err != nil {
+ t.Fatalf("fd.Impl().LockBSD failed: err = %v", err)
+ }
+
+ rng1 := lock.LockRange{0, 1}
+ rng2 := lock.LockRange{1, 2}
+
+ if err := fd.Impl().LockPOSIX(ctx, uid1, lock.ReadLock, rng1, block); err != nil {
+ t.Fatalf("fd.Impl().LockPOSIX failed: err = %v", err)
+ }
+ if err := fd.Impl().LockPOSIX(ctx, uid2, lock.ReadLock, rng2, block); err != nil {
+ t.Fatalf("fd.Impl().LockPOSIX failed: err = %v", err)
+ }
+ if err := fd.Impl().LockPOSIX(ctx, uid1, lock.WriteLock, rng1, block); err != nil {
+ t.Fatalf("fd.Impl().LockPOSIX failed: err = %v", err)
+ }
+ if got, want := fd.Impl().LockPOSIX(ctx, uid2, lock.ReadLock, rng1, block), syserror.ErrWouldBlock; got != want {
+ t.Fatalf("fd.Impl().LockPOSIX failed: got = %v, want = %v", got, want)
+ }
+ if err := fd.Impl().UnlockPOSIX(ctx, uid1, rng1); err != nil {
+ t.Fatalf("fd.Impl().UnlockPOSIX failed: err = %v", err)
+ }
+}
+
func TestPRead(t *testing.T) {
ctx := contexttest.Context(t)
fd, cleanup, err := newFileFD(ctx, 0644)
diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
index 88dbd6e35..2108d0f4d 100644
--- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go
+++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
@@ -30,10 +30,12 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sentry/pgalloc"
"gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/sentry/vfs/lock"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserror"
)
@@ -153,6 +155,9 @@ type inode struct {
rdevMajor uint32
rdevMinor uint32
+ // Advisory file locks, which lock at the inode level.
+ locks lock.FileLocks
+
impl interface{} // immutable
}
@@ -352,6 +357,44 @@ func (i *inode) setStat(stat linux.Statx) error {
return nil
}
+// TODO(gvisor.dev/issue/1480): support file locking for file types other than regular.
+func (i *inode) lockBSD(uid fslock.UniqueID, t fslock.LockType, block fslock.Blocker) error {
+ switch i.impl.(type) {
+ case *regularFile:
+ return i.locks.LockBSD(uid, t, block)
+ }
+ return syserror.EBADF
+}
+
+// TODO(gvisor.dev/issue/1480): support file locking for file types other than regular.
+func (i *inode) unlockBSD(uid fslock.UniqueID) error {
+ switch i.impl.(type) {
+ case *regularFile:
+ i.locks.UnlockBSD(uid)
+ return nil
+ }
+ return syserror.EBADF
+}
+
+// TODO(gvisor.dev/issue/1480): support file locking for file types other than regular.
+func (i *inode) lockPOSIX(uid fslock.UniqueID, t fslock.LockType, rng fslock.LockRange, block fslock.Blocker) error {
+ switch i.impl.(type) {
+ case *regularFile:
+ return i.locks.LockPOSIX(uid, t, rng, block)
+ }
+ return syserror.EBADF
+}
+
+// TODO(gvisor.dev/issue/1480): support file locking for file types other than regular.
+func (i *inode) unlockPOSIX(uid fslock.UniqueID, rng fslock.LockRange) error {
+ switch i.impl.(type) {
+ case *regularFile:
+ i.locks.UnlockPOSIX(uid, rng)
+ return nil
+ }
+ return syserror.EBADF
+}
+
// allocatedBlocksForSize returns the number of 512B blocks needed to
// accommodate the given size in bytes, as appropriate for struct
// stat::st_blocks and struct statx::stx_blocks. (Note that this 512B block
diff --git a/pkg/sentry/inet/inet.go b/pkg/sentry/inet/inet.go
index a7dfb78a7..2916a0644 100644
--- a/pkg/sentry/inet/inet.go
+++ b/pkg/sentry/inet/inet.go
@@ -28,6 +28,10 @@ type Stack interface {
// interface indexes to a slice of associated interface address properties.
InterfaceAddrs() map[int32][]InterfaceAddr
+ // AddInterfaceAddr adds an address to the network interface identified by
+ // index.
+ AddInterfaceAddr(idx int32, addr InterfaceAddr) error
+
// SupportsIPv6 returns true if the stack supports IPv6 connectivity.
SupportsIPv6() bool
diff --git a/pkg/sentry/inet/test_stack.go b/pkg/sentry/inet/test_stack.go
index dcfcbd97e..d8961fc94 100644
--- a/pkg/sentry/inet/test_stack.go
+++ b/pkg/sentry/inet/test_stack.go
@@ -47,6 +47,12 @@ func (s *TestStack) InterfaceAddrs() map[int32][]InterfaceAddr {
return s.InterfaceAddrsMap
}
+// AddInterfaceAddr implements Stack.AddInterfaceAddr.
+func (s *TestStack) AddInterfaceAddr(idx int32, addr InterfaceAddr) error {
+ s.InterfaceAddrsMap[idx] = append(s.InterfaceAddrsMap[idx], addr)
+ return nil
+}
+
// SupportsIPv6 implements Stack.SupportsIPv6.
func (s *TestStack) SupportsIPv6() bool {
return s.SupportsIPv6Flag
diff --git a/pkg/sentry/mm/README.md b/pkg/sentry/mm/README.md
index e1322e373..f4d43d927 100644
--- a/pkg/sentry/mm/README.md
+++ b/pkg/sentry/mm/README.md
@@ -274,7 +274,7 @@ In the sentry:
methods
[`platform.AddressSpace.MapFile` and `platform.AddressSpace.Unmap`][platform].
-[memmap]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/memmap/memmap.go
-[mm]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/mm/mm.go
-[pgalloc]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/pgalloc/pgalloc.go
-[platform]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/platform/platform.go
+[memmap]: https://github.com/google/gvisor/blob/master/pkg/sentry/memmap/memmap.go
+[mm]: https://github.com/google/gvisor/blob/master/pkg/sentry/mm/mm.go
+[pgalloc]: https://github.com/google/gvisor/blob/master/pkg/sentry/pgalloc/pgalloc.go
+[platform]: https://github.com/google/gvisor/blob/master/pkg/sentry/platform/platform.go
diff --git a/pkg/sentry/socket/hostinet/socket.go b/pkg/sentry/socket/hostinet/socket.go
index bde4c7a1e..34f63986f 100644
--- a/pkg/sentry/socket/hostinet/socket.go
+++ b/pkg/sentry/socket/hostinet/socket.go
@@ -126,7 +126,7 @@ func (s *socketOperations) Read(ctx context.Context, _ *fs.File, dst usermem.IOS
}
return uint64(n), nil
}
- return readv(s.fd, iovecsFromBlockSeq(dsts))
+ return readv(s.fd, safemem.IovecsFromBlockSeq(dsts))
}))
return int64(n), err
}
@@ -149,7 +149,7 @@ func (s *socketOperations) Write(ctx context.Context, _ *fs.File, src usermem.IO
}
return uint64(n), nil
}
- return writev(s.fd, iovecsFromBlockSeq(srcs))
+ return writev(s.fd, safemem.IovecsFromBlockSeq(srcs))
}))
return int64(n), err
}
@@ -402,7 +402,7 @@ func (s *socketOperations) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags
// We always do a non-blocking recv*().
sysflags := flags | syscall.MSG_DONTWAIT
- iovs := iovecsFromBlockSeq(dsts)
+ iovs := safemem.IovecsFromBlockSeq(dsts)
msg := syscall.Msghdr{
Iov: &iovs[0],
Iovlen: uint64(len(iovs)),
@@ -522,7 +522,7 @@ func (s *socketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to []
return uint64(n), nil
}
- iovs := iovecsFromBlockSeq(srcs)
+ iovs := safemem.IovecsFromBlockSeq(srcs)
msg := syscall.Msghdr{
Iov: &iovs[0],
Iovlen: uint64(len(iovs)),
@@ -567,21 +567,6 @@ func (s *socketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to []
return int(n), syserr.FromError(err)
}
-func iovecsFromBlockSeq(bs safemem.BlockSeq) []syscall.Iovec {
- iovs := make([]syscall.Iovec, 0, bs.NumBlocks())
- for ; !bs.IsEmpty(); bs = bs.Tail() {
- b := bs.Head()
- iovs = append(iovs, syscall.Iovec{
- Base: &b.ToSlice()[0],
- Len: uint64(b.Len()),
- })
- // We don't need to care about b.NeedSafecopy(), because the host
- // kernel will handle such address ranges just fine (by returning
- // EFAULT).
- }
- return iovs
-}
-
func translateIOSyscallError(err error) error {
if err == syscall.EAGAIN || err == syscall.EWOULDBLOCK {
return syserror.ErrWouldBlock
diff --git a/pkg/sentry/socket/hostinet/stack.go b/pkg/sentry/socket/hostinet/stack.go
index 034eca676..a48082631 100644
--- a/pkg/sentry/socket/hostinet/stack.go
+++ b/pkg/sentry/socket/hostinet/stack.go
@@ -310,6 +310,11 @@ func (s *Stack) InterfaceAddrs() map[int32][]inet.InterfaceAddr {
return addrs
}
+// AddInterfaceAddr implements inet.Stack.AddInterfaceAddr.
+func (s *Stack) AddInterfaceAddr(idx int32, addr inet.InterfaceAddr) error {
+ return syserror.EACCES
+}
+
// SupportsIPv6 implements inet.Stack.SupportsIPv6.
func (s *Stack) SupportsIPv6() bool {
return s.supportsIPv6
diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go
index 8f14643b0..ea43a0ce3 100644
--- a/pkg/sentry/socket/netfilter/netfilter.go
+++ b/pkg/sentry/socket/netfilter/netfilter.go
@@ -48,8 +48,7 @@ type metadata struct {
Size uint32
}
-// nflog logs messages related to the writing and reading of iptables, but only
-// when enableDebug is true.
+// nflog logs messages related to the writing and reading of iptables.
func nflog(format string, args ...interface{}) {
log.Infof("netfilter: "+format, args...)
}
diff --git a/pkg/sentry/socket/netlink/BUILD b/pkg/sentry/socket/netlink/BUILD
index f8b8e467d..1911cd9b8 100644
--- a/pkg/sentry/socket/netlink/BUILD
+++ b/pkg/sentry/socket/netlink/BUILD
@@ -1,4 +1,4 @@
-load("//tools:defs.bzl", "go_library")
+load("//tools:defs.bzl", "go_library", "go_test")
package(licenses = ["notice"])
@@ -33,3 +33,15 @@ go_library(
"//pkg/waiter",
],
)
+
+go_test(
+ name = "netlink_test",
+ size = "small",
+ srcs = [
+ "message_test.go",
+ ],
+ deps = [
+ ":netlink",
+ "//pkg/abi/linux",
+ ],
+)
diff --git a/pkg/sentry/socket/netlink/message.go b/pkg/sentry/socket/netlink/message.go
index b21e0ca4b..4ea252ccb 100644
--- a/pkg/sentry/socket/netlink/message.go
+++ b/pkg/sentry/socket/netlink/message.go
@@ -30,8 +30,16 @@ func alignUp(length int, align uint) int {
return (length + int(align) - 1) &^ (int(align) - 1)
}
+// alignPad returns the length of padding required for alignment.
+//
+// Preconditions: align is a power of two.
+func alignPad(length int, align uint) int {
+ return alignUp(length, align) - length
+}
+
// Message contains a complete serialized netlink message.
type Message struct {
+ hdr linux.NetlinkMessageHeader
buf []byte
}
@@ -40,10 +48,86 @@ type Message struct {
// The header length will be updated by Finalize.
func NewMessage(hdr linux.NetlinkMessageHeader) *Message {
return &Message{
+ hdr: hdr,
buf: binary.Marshal(nil, usermem.ByteOrder, hdr),
}
}
+// ParseMessage parses the first message seen at buf, returning the rest of the
+// buffer. If message is malformed, ok of false is returned. For last message,
+// padding check is loose, if there isn't enought padding, whole buf is consumed
+// and ok is set to true.
+func ParseMessage(buf []byte) (msg *Message, rest []byte, ok bool) {
+ b := BytesView(buf)
+
+ hdrBytes, ok := b.Extract(linux.NetlinkMessageHeaderSize)
+ if !ok {
+ return
+ }
+ var hdr linux.NetlinkMessageHeader
+ binary.Unmarshal(hdrBytes, usermem.ByteOrder, &hdr)
+
+ // Msg portion.
+ totalMsgLen := int(hdr.Length)
+ _, ok = b.Extract(totalMsgLen - linux.NetlinkMessageHeaderSize)
+ if !ok {
+ return
+ }
+
+ // Padding.
+ numPad := alignPad(totalMsgLen, linux.NLMSG_ALIGNTO)
+ // Linux permits the last message not being aligned, just consume all of it.
+ // Ref: net/netlink/af_netlink.c:netlink_rcv_skb
+ if numPad > len(b) {
+ numPad = len(b)
+ }
+ _, ok = b.Extract(numPad)
+ if !ok {
+ return
+ }
+
+ return &Message{
+ hdr: hdr,
+ buf: buf[:totalMsgLen],
+ }, []byte(b), true
+}
+
+// Header returns the header of this message.
+func (m *Message) Header() linux.NetlinkMessageHeader {
+ return m.hdr
+}
+
+// GetData unmarshals the payload message header from this netlink message, and
+// returns the attributes portion.
+func (m *Message) GetData(msg interface{}) (AttrsView, bool) {
+ b := BytesView(m.buf)
+
+ _, ok := b.Extract(linux.NetlinkMessageHeaderSize)
+ if !ok {
+ return nil, false
+ }
+
+ size := int(binary.Size(msg))
+ msgBytes, ok := b.Extract(size)
+ if !ok {
+ return nil, false
+ }
+ binary.Unmarshal(msgBytes, usermem.ByteOrder, msg)
+
+ numPad := alignPad(linux.NetlinkMessageHeaderSize+size, linux.NLMSG_ALIGNTO)
+ // Linux permits the last message not being aligned, just consume all of it.
+ // Ref: net/netlink/af_netlink.c:netlink_rcv_skb
+ if numPad > len(b) {
+ numPad = len(b)
+ }
+ _, ok = b.Extract(numPad)
+ if !ok {
+ return nil, false
+ }
+
+ return AttrsView(b), true
+}
+
// Finalize returns the []byte containing the entire message, with the total
// length set in the message header. The Message must not be modified after
// calling Finalize.
@@ -157,3 +241,48 @@ func (ms *MessageSet) AddMessage(hdr linux.NetlinkMessageHeader) *Message {
ms.Messages = append(ms.Messages, m)
return m
}
+
+// AttrsView is a view into the attributes portion of a netlink message.
+type AttrsView []byte
+
+// Empty returns whether there is no attribute left in v.
+func (v AttrsView) Empty() bool {
+ return len(v) == 0
+}
+
+// ParseFirst parses first netlink attribute at the beginning of v.
+func (v AttrsView) ParseFirst() (hdr linux.NetlinkAttrHeader, value []byte, rest AttrsView, ok bool) {
+ b := BytesView(v)
+
+ hdrBytes, ok := b.Extract(linux.NetlinkAttrHeaderSize)
+ if !ok {
+ return
+ }
+ binary.Unmarshal(hdrBytes, usermem.ByteOrder, &hdr)
+
+ value, ok = b.Extract(int(hdr.Length) - linux.NetlinkAttrHeaderSize)
+ if !ok {
+ return
+ }
+
+ _, ok = b.Extract(alignPad(int(hdr.Length), linux.NLA_ALIGNTO))
+ if !ok {
+ return
+ }
+
+ return hdr, value, AttrsView(b), ok
+}
+
+// BytesView supports extracting data from a byte slice with bounds checking.
+type BytesView []byte
+
+// Extract removes the first n bytes from v and returns it. If n is out of
+// bounds, it returns false.
+func (v *BytesView) Extract(n int) ([]byte, bool) {
+ if n < 0 || n > len(*v) {
+ return nil, false
+ }
+ extracted := (*v)[:n]
+ *v = (*v)[n:]
+ return extracted, true
+}
diff --git a/pkg/sentry/socket/netlink/message_test.go b/pkg/sentry/socket/netlink/message_test.go
new file mode 100644
index 000000000..ef13d9386
--- /dev/null
+++ b/pkg/sentry/socket/netlink/message_test.go
@@ -0,0 +1,312 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package message_test
+
+import (
+ "bytes"
+ "reflect"
+ "testing"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/socket/netlink"
+)
+
+type dummyNetlinkMsg struct {
+ Foo uint16
+}
+
+func TestParseMessage(t *testing.T) {
+ tests := []struct {
+ desc string
+ input []byte
+
+ header linux.NetlinkMessageHeader
+ dataMsg *dummyNetlinkMsg
+ restLen int
+ ok bool
+ }{
+ {
+ desc: "valid",
+ input: []byte{
+ 0x14, 0x00, 0x00, 0x00, // Length
+ 0x01, 0x00, // Type
+ 0x02, 0x00, // Flags
+ 0x03, 0x00, 0x00, 0x00, // Seq
+ 0x04, 0x00, 0x00, 0x00, // PortID
+ 0x30, 0x31, 0x00, 0x00, // Data message with 2 bytes padding
+ },
+ header: linux.NetlinkMessageHeader{
+ Length: 20,
+ Type: 1,
+ Flags: 2,
+ Seq: 3,
+ PortID: 4,
+ },
+ dataMsg: &dummyNetlinkMsg{
+ Foo: 0x3130,
+ },
+ restLen: 0,
+ ok: true,
+ },
+ {
+ desc: "valid with next message",
+ input: []byte{
+ 0x14, 0x00, 0x00, 0x00, // Length
+ 0x01, 0x00, // Type
+ 0x02, 0x00, // Flags
+ 0x03, 0x00, 0x00, 0x00, // Seq
+ 0x04, 0x00, 0x00, 0x00, // PortID
+ 0x30, 0x31, 0x00, 0x00, // Data message with 2 bytes padding
+ 0xFF, // Next message (rest)
+ },
+ header: linux.NetlinkMessageHeader{
+ Length: 20,
+ Type: 1,
+ Flags: 2,
+ Seq: 3,
+ PortID: 4,
+ },
+ dataMsg: &dummyNetlinkMsg{
+ Foo: 0x3130,
+ },
+ restLen: 1,
+ ok: true,
+ },
+ {
+ desc: "valid for last message without padding",
+ input: []byte{
+ 0x12, 0x00, 0x00, 0x00, // Length
+ 0x01, 0x00, // Type
+ 0x02, 0x00, // Flags
+ 0x03, 0x00, 0x00, 0x00, // Seq
+ 0x04, 0x00, 0x00, 0x00, // PortID
+ 0x30, 0x31, // Data message
+ },
+ header: linux.NetlinkMessageHeader{
+ Length: 18,
+ Type: 1,
+ Flags: 2,
+ Seq: 3,
+ PortID: 4,
+ },
+ dataMsg: &dummyNetlinkMsg{
+ Foo: 0x3130,
+ },
+ restLen: 0,
+ ok: true,
+ },
+ {
+ desc: "valid for last message not to be aligned",
+ input: []byte{
+ 0x13, 0x00, 0x00, 0x00, // Length
+ 0x01, 0x00, // Type
+ 0x02, 0x00, // Flags
+ 0x03, 0x00, 0x00, 0x00, // Seq
+ 0x04, 0x00, 0x00, 0x00, // PortID
+ 0x30, 0x31, // Data message
+ 0x00, // Excessive 1 byte permitted at end
+ },
+ header: linux.NetlinkMessageHeader{
+ Length: 19,
+ Type: 1,
+ Flags: 2,
+ Seq: 3,
+ PortID: 4,
+ },
+ dataMsg: &dummyNetlinkMsg{
+ Foo: 0x3130,
+ },
+ restLen: 0,
+ ok: true,
+ },
+ {
+ desc: "header.Length too short",
+ input: []byte{
+ 0x04, 0x00, 0x00, 0x00, // Length
+ 0x01, 0x00, // Type
+ 0x02, 0x00, // Flags
+ 0x03, 0x00, 0x00, 0x00, // Seq
+ 0x04, 0x00, 0x00, 0x00, // PortID
+ 0x30, 0x31, 0x00, 0x00, // Data message with 2 bytes padding
+ },
+ ok: false,
+ },
+ {
+ desc: "header.Length too long",
+ input: []byte{
+ 0xFF, 0xFF, 0x00, 0x00, // Length
+ 0x01, 0x00, // Type
+ 0x02, 0x00, // Flags
+ 0x03, 0x00, 0x00, 0x00, // Seq
+ 0x04, 0x00, 0x00, 0x00, // PortID
+ 0x30, 0x31, 0x00, 0x00, // Data message with 2 bytes padding
+ },
+ ok: false,
+ },
+ {
+ desc: "header incomplete",
+ input: []byte{
+ 0x04, 0x00, 0x00, 0x00, // Length
+ },
+ ok: false,
+ },
+ {
+ desc: "empty message",
+ input: []byte{},
+ ok: false,
+ },
+ }
+ for _, test := range tests {
+ msg, rest, ok := netlink.ParseMessage(test.input)
+ if ok != test.ok {
+ t.Errorf("%v: got ok = %v, want = %v", test.desc, ok, test.ok)
+ continue
+ }
+ if !test.ok {
+ continue
+ }
+ if !reflect.DeepEqual(msg.Header(), test.header) {
+ t.Errorf("%v: got hdr = %+v, want = %+v", test.desc, msg.Header(), test.header)
+ }
+
+ dataMsg := &dummyNetlinkMsg{}
+ _, dataOk := msg.GetData(dataMsg)
+ if !dataOk {
+ t.Errorf("%v: GetData.ok = %v, want = true", test.desc, dataOk)
+ } else if !reflect.DeepEqual(dataMsg, test.dataMsg) {
+ t.Errorf("%v: GetData.msg = %+v, want = %+v", test.desc, dataMsg, test.dataMsg)
+ }
+
+ if got, want := rest, test.input[len(test.input)-test.restLen:]; !bytes.Equal(got, want) {
+ t.Errorf("%v: got rest = %v, want = %v", test.desc, got, want)
+ }
+ }
+}
+
+func TestAttrView(t *testing.T) {
+ tests := []struct {
+ desc string
+ input []byte
+
+ // Outputs for ParseFirst.
+ hdr linux.NetlinkAttrHeader
+ value []byte
+ restLen int
+ ok bool
+
+ // Outputs for Empty.
+ isEmpty bool
+ }{
+ {
+ desc: "valid",
+ input: []byte{
+ 0x06, 0x00, // Length
+ 0x01, 0x00, // Type
+ 0x30, 0x31, 0x00, 0x00, // Data with 2 bytes padding
+ },
+ hdr: linux.NetlinkAttrHeader{
+ Length: 6,
+ Type: 1,
+ },
+ value: []byte{0x30, 0x31},
+ restLen: 0,
+ ok: true,
+ isEmpty: false,
+ },
+ {
+ desc: "at alignment",
+ input: []byte{
+ 0x08, 0x00, // Length
+ 0x01, 0x00, // Type
+ 0x30, 0x31, 0x32, 0x33, // Data
+ },
+ hdr: linux.NetlinkAttrHeader{
+ Length: 8,
+ Type: 1,
+ },
+ value: []byte{0x30, 0x31, 0x32, 0x33},
+ restLen: 0,
+ ok: true,
+ isEmpty: false,
+ },
+ {
+ desc: "at alignment with rest data",
+ input: []byte{
+ 0x08, 0x00, // Length
+ 0x01, 0x00, // Type
+ 0x30, 0x31, 0x32, 0x33, // Data
+ 0xFF, 0xFE, // Rest data
+ },
+ hdr: linux.NetlinkAttrHeader{
+ Length: 8,
+ Type: 1,
+ },
+ value: []byte{0x30, 0x31, 0x32, 0x33},
+ restLen: 2,
+ ok: true,
+ isEmpty: false,
+ },
+ {
+ desc: "hdr.Length too long",
+ input: []byte{
+ 0xFF, 0x00, // Length
+ 0x01, 0x00, // Type
+ 0x30, 0x31, 0x32, 0x33, // Data
+ },
+ ok: false,
+ isEmpty: false,
+ },
+ {
+ desc: "hdr.Length too short",
+ input: []byte{
+ 0x01, 0x00, // Length
+ 0x01, 0x00, // Type
+ 0x30, 0x31, 0x32, 0x33, // Data
+ },
+ ok: false,
+ isEmpty: false,
+ },
+ {
+ desc: "empty",
+ input: []byte{},
+ ok: false,
+ isEmpty: true,
+ },
+ }
+ for _, test := range tests {
+ attrs := netlink.AttrsView(test.input)
+
+ // Test ParseFirst().
+ hdr, value, rest, ok := attrs.ParseFirst()
+ if ok != test.ok {
+ t.Errorf("%v: got ok = %v, want = %v", test.desc, ok, test.ok)
+ } else if test.ok {
+ if !reflect.DeepEqual(hdr, test.hdr) {
+ t.Errorf("%v: got hdr = %+v, want = %+v", test.desc, hdr, test.hdr)
+ }
+ if !bytes.Equal(value, test.value) {
+ t.Errorf("%v: got value = %v, want = %v", test.desc, value, test.value)
+ }
+ if wantRest := test.input[len(test.input)-test.restLen:]; !bytes.Equal(rest, wantRest) {
+ t.Errorf("%v: got rest = %v, want = %v", test.desc, rest, wantRest)
+ }
+ }
+
+ // Test Empty().
+ if got, want := attrs.Empty(), test.isEmpty; got != want {
+ t.Errorf("%v: got empty = %v, want = %v", test.desc, got, want)
+ }
+ }
+}
diff --git a/pkg/sentry/socket/netlink/provider.go b/pkg/sentry/socket/netlink/provider.go
index 07f860a49..b0dc70e5c 100644
--- a/pkg/sentry/socket/netlink/provider.go
+++ b/pkg/sentry/socket/netlink/provider.go
@@ -42,7 +42,7 @@ type Protocol interface {
// If err == nil, any messages added to ms will be sent back to the
// other end of the socket. Setting ms.Multi will cause an NLMSG_DONE
// message to be sent even if ms contains no messages.
- ProcessMessage(ctx context.Context, hdr linux.NetlinkMessageHeader, data []byte, ms *MessageSet) *syserr.Error
+ ProcessMessage(ctx context.Context, msg *Message, ms *MessageSet) *syserr.Error
}
// Provider is a function that creates a new Protocol for a specific netlink
diff --git a/pkg/sentry/socket/netlink/route/BUILD b/pkg/sentry/socket/netlink/route/BUILD
index 622a1eafc..93127398d 100644
--- a/pkg/sentry/socket/netlink/route/BUILD
+++ b/pkg/sentry/socket/netlink/route/BUILD
@@ -10,13 +10,11 @@ go_library(
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
- "//pkg/binary",
"//pkg/context",
"//pkg/sentry/inet",
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/socket/netlink",
"//pkg/syserr",
- "//pkg/usermem",
],
)
diff --git a/pkg/sentry/socket/netlink/route/protocol.go b/pkg/sentry/socket/netlink/route/protocol.go
index 2b3c7f5b3..c84d8bd7c 100644
--- a/pkg/sentry/socket/netlink/route/protocol.go
+++ b/pkg/sentry/socket/netlink/route/protocol.go
@@ -17,16 +17,15 @@ package route
import (
"bytes"
+ "syscall"
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/binary"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/inet"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/socket/netlink"
"gvisor.dev/gvisor/pkg/syserr"
- "gvisor.dev/gvisor/pkg/usermem"
)
// commandKind describes the operational class of a message type.
@@ -69,13 +68,7 @@ func (p *Protocol) CanSend() bool {
}
// dumpLinks handles RTM_GETLINK dump requests.
-func (p *Protocol) dumpLinks(ctx context.Context, hdr linux.NetlinkMessageHeader, data []byte, ms *netlink.MessageSet) *syserr.Error {
- // TODO(b/68878065): Only the dump variant of the types below are
- // supported.
- if hdr.Flags&linux.NLM_F_DUMP != linux.NLM_F_DUMP {
- return syserr.ErrNotSupported
- }
-
+func (p *Protocol) dumpLinks(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error {
// NLM_F_DUMP + RTM_GETLINK messages are supposed to include an
// ifinfomsg. However, Linux <3.9 only checked for rtgenmsg, and some
// userspace applications (including glibc) still include rtgenmsg.
@@ -99,44 +92,105 @@ func (p *Protocol) dumpLinks(ctx context.Context, hdr linux.NetlinkMessageHeader
return nil
}
- for id, i := range stack.Interfaces() {
- m := ms.AddMessage(linux.NetlinkMessageHeader{
- Type: linux.RTM_NEWLINK,
- })
+ for idx, i := range stack.Interfaces() {
+ addNewLinkMessage(ms, idx, i)
+ }
- m.Put(linux.InterfaceInfoMessage{
- Family: linux.AF_UNSPEC,
- Type: i.DeviceType,
- Index: id,
- Flags: i.Flags,
- })
+ return nil
+}
- m.PutAttrString(linux.IFLA_IFNAME, i.Name)
- m.PutAttr(linux.IFLA_MTU, i.MTU)
+// getLinks handles RTM_GETLINK requests.
+func (p *Protocol) getLink(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error {
+ stack := inet.StackFromContext(ctx)
+ if stack == nil {
+ // No network devices.
+ return nil
+ }
- mac := make([]byte, 6)
- brd := mac
- if len(i.Addr) > 0 {
- mac = i.Addr
- brd = bytes.Repeat([]byte{0xff}, len(i.Addr))
+ // Parse message.
+ var ifi linux.InterfaceInfoMessage
+ attrs, ok := msg.GetData(&ifi)
+ if !ok {
+ return syserr.ErrInvalidArgument
+ }
+
+ // Parse attributes.
+ var byName []byte
+ for !attrs.Empty() {
+ ahdr, value, rest, ok := attrs.ParseFirst()
+ if !ok {
+ return syserr.ErrInvalidArgument
}
- m.PutAttr(linux.IFLA_ADDRESS, mac)
- m.PutAttr(linux.IFLA_BROADCAST, brd)
+ attrs = rest
- // TODO(gvisor.dev/issue/578): There are many more attributes.
+ switch ahdr.Type {
+ case linux.IFLA_IFNAME:
+ if len(value) < 1 {
+ return syserr.ErrInvalidArgument
+ }
+ byName = value[:len(value)-1]
+
+ // TODO(gvisor.dev/issue/578): Support IFLA_EXT_MASK.
+ }
}
+ found := false
+ for idx, i := range stack.Interfaces() {
+ switch {
+ case ifi.Index > 0:
+ if idx != ifi.Index {
+ continue
+ }
+ case byName != nil:
+ if string(byName) != i.Name {
+ continue
+ }
+ default:
+ // Criteria not specified.
+ return syserr.ErrInvalidArgument
+ }
+
+ addNewLinkMessage(ms, idx, i)
+ found = true
+ break
+ }
+ if !found {
+ return syserr.ErrNoDevice
+ }
return nil
}
-// dumpAddrs handles RTM_GETADDR dump requests.
-func (p *Protocol) dumpAddrs(ctx context.Context, hdr linux.NetlinkMessageHeader, data []byte, ms *netlink.MessageSet) *syserr.Error {
- // TODO(b/68878065): Only the dump variant of the types below are
- // supported.
- if hdr.Flags&linux.NLM_F_DUMP != linux.NLM_F_DUMP {
- return syserr.ErrNotSupported
+// addNewLinkMessage appends RTM_NEWLINK message for the given interface into
+// the message set.
+func addNewLinkMessage(ms *netlink.MessageSet, idx int32, i inet.Interface) {
+ m := ms.AddMessage(linux.NetlinkMessageHeader{
+ Type: linux.RTM_NEWLINK,
+ })
+
+ m.Put(linux.InterfaceInfoMessage{
+ Family: linux.AF_UNSPEC,
+ Type: i.DeviceType,
+ Index: idx,
+ Flags: i.Flags,
+ })
+
+ m.PutAttrString(linux.IFLA_IFNAME, i.Name)
+ m.PutAttr(linux.IFLA_MTU, i.MTU)
+
+ mac := make([]byte, 6)
+ brd := mac
+ if len(i.Addr) > 0 {
+ mac = i.Addr
+ brd = bytes.Repeat([]byte{0xff}, len(i.Addr))
}
+ m.PutAttr(linux.IFLA_ADDRESS, mac)
+ m.PutAttr(linux.IFLA_BROADCAST, brd)
+
+ // TODO(gvisor.dev/issue/578): There are many more attributes.
+}
+// dumpAddrs handles RTM_GETADDR dump requests.
+func (p *Protocol) dumpAddrs(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error {
// RTM_GETADDR dump requests need not contain anything more than the
// netlink header and 1 byte protocol family common to all
// NETLINK_ROUTE requests.
@@ -168,6 +222,7 @@ func (p *Protocol) dumpAddrs(ctx context.Context, hdr linux.NetlinkMessageHeader
Index: uint32(id),
})
+ m.PutAttr(linux.IFA_LOCAL, []byte(a.Addr))
m.PutAttr(linux.IFA_ADDRESS, []byte(a.Addr))
// TODO(gvisor.dev/issue/578): There are many more attributes.
@@ -252,12 +307,12 @@ func fillRoute(routes []inet.Route, addr []byte) (inet.Route, *syserr.Error) {
}
// parseForDestination parses a message as format of RouteMessage-RtAttr-dst.
-func parseForDestination(data []byte) ([]byte, *syserr.Error) {
+func parseForDestination(msg *netlink.Message) ([]byte, *syserr.Error) {
var rtMsg linux.RouteMessage
- if len(data) < linux.SizeOfRouteMessage {
+ attrs, ok := msg.GetData(&rtMsg)
+ if !ok {
return nil, syserr.ErrInvalidArgument
}
- binary.Unmarshal(data[:linux.SizeOfRouteMessage], usermem.ByteOrder, &rtMsg)
// iproute2 added the RTM_F_LOOKUP_TABLE flag in version v4.4.0. See
// commit bc234301af12. Note we don't check this flag for backward
// compatibility.
@@ -265,26 +320,15 @@ func parseForDestination(data []byte) ([]byte, *syserr.Error) {
return nil, syserr.ErrNotSupported
}
- data = data[linux.SizeOfRouteMessage:]
-
- // TODO(gvisor.dev/issue/1611): Add generic attribute parsing.
- var rtAttr linux.RtAttr
- if len(data) < linux.SizeOfRtAttr {
- return nil, syserr.ErrInvalidArgument
+ // Expect first attribute is RTA_DST.
+ if hdr, value, _, ok := attrs.ParseFirst(); ok && hdr.Type == linux.RTA_DST {
+ return value, nil
}
- binary.Unmarshal(data[:linux.SizeOfRtAttr], usermem.ByteOrder, &rtAttr)
- if rtAttr.Type != linux.RTA_DST {
- return nil, syserr.ErrInvalidArgument
- }
-
- if len(data) < int(rtAttr.Len) {
- return nil, syserr.ErrInvalidArgument
- }
- return data[linux.SizeOfRtAttr:rtAttr.Len], nil
+ return nil, syserr.ErrInvalidArgument
}
// dumpRoutes handles RTM_GETROUTE requests.
-func (p *Protocol) dumpRoutes(ctx context.Context, hdr linux.NetlinkMessageHeader, data []byte, ms *netlink.MessageSet) *syserr.Error {
+func (p *Protocol) dumpRoutes(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error {
// RTM_GETROUTE dump requests need not contain anything more than the
// netlink header and 1 byte protocol family common to all
// NETLINK_ROUTE requests.
@@ -295,10 +339,11 @@ func (p *Protocol) dumpRoutes(ctx context.Context, hdr linux.NetlinkMessageHeade
return nil
}
+ hdr := msg.Header()
routeTables := stack.RouteTable()
if hdr.Flags == linux.NLM_F_REQUEST {
- dst, err := parseForDestination(data)
+ dst, err := parseForDestination(msg)
if err != nil {
return err
}
@@ -357,10 +402,55 @@ func (p *Protocol) dumpRoutes(ctx context.Context, hdr linux.NetlinkMessageHeade
return nil
}
+// newAddr handles RTM_NEWADDR requests.
+func (p *Protocol) newAddr(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error {
+ stack := inet.StackFromContext(ctx)
+ if stack == nil {
+ // No network stack.
+ return syserr.ErrProtocolNotSupported
+ }
+
+ var ifa linux.InterfaceAddrMessage
+ attrs, ok := msg.GetData(&ifa)
+ if !ok {
+ return syserr.ErrInvalidArgument
+ }
+
+ for !attrs.Empty() {
+ ahdr, value, rest, ok := attrs.ParseFirst()
+ if !ok {
+ return syserr.ErrInvalidArgument
+ }
+ attrs = rest
+
+ switch ahdr.Type {
+ case linux.IFA_LOCAL:
+ err := stack.AddInterfaceAddr(int32(ifa.Index), inet.InterfaceAddr{
+ Family: ifa.Family,
+ PrefixLen: ifa.PrefixLen,
+ Flags: ifa.Flags,
+ Addr: value,
+ })
+ if err == syscall.EEXIST {
+ flags := msg.Header().Flags
+ if flags&linux.NLM_F_EXCL != 0 {
+ return syserr.ErrExists
+ }
+ } else if err != nil {
+ return syserr.ErrInvalidArgument
+ }
+ }
+ }
+ return nil
+}
+
// ProcessMessage implements netlink.Protocol.ProcessMessage.
-func (p *Protocol) ProcessMessage(ctx context.Context, hdr linux.NetlinkMessageHeader, data []byte, ms *netlink.MessageSet) *syserr.Error {
+func (p *Protocol) ProcessMessage(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error {
+ hdr := msg.Header()
+
// All messages start with a 1 byte protocol family.
- if len(data) < 1 {
+ var family uint8
+ if _, ok := msg.GetData(&family); !ok {
// Linux ignores messages missing the protocol family. See
// net/core/rtnetlink.c:rtnetlink_rcv_msg.
return nil
@@ -374,16 +464,32 @@ func (p *Protocol) ProcessMessage(ctx context.Context, hdr linux.NetlinkMessageH
}
}
- switch hdr.Type {
- case linux.RTM_GETLINK:
- return p.dumpLinks(ctx, hdr, data, ms)
- case linux.RTM_GETADDR:
- return p.dumpAddrs(ctx, hdr, data, ms)
- case linux.RTM_GETROUTE:
- return p.dumpRoutes(ctx, hdr, data, ms)
- default:
- return syserr.ErrNotSupported
+ if hdr.Flags&linux.NLM_F_DUMP == linux.NLM_F_DUMP {
+ // TODO(b/68878065): Only the dump variant of the types below are
+ // supported.
+ switch hdr.Type {
+ case linux.RTM_GETLINK:
+ return p.dumpLinks(ctx, msg, ms)
+ case linux.RTM_GETADDR:
+ return p.dumpAddrs(ctx, msg, ms)
+ case linux.RTM_GETROUTE:
+ return p.dumpRoutes(ctx, msg, ms)
+ default:
+ return syserr.ErrNotSupported
+ }
+ } else if hdr.Flags&linux.NLM_F_REQUEST == linux.NLM_F_REQUEST {
+ switch hdr.Type {
+ case linux.RTM_GETLINK:
+ return p.getLink(ctx, msg, ms)
+ case linux.RTM_GETROUTE:
+ return p.dumpRoutes(ctx, msg, ms)
+ case linux.RTM_NEWADDR:
+ return p.newAddr(ctx, msg, ms)
+ default:
+ return syserr.ErrNotSupported
+ }
}
+ return syserr.ErrNotSupported
}
// init registers the NETLINK_ROUTE provider.
diff --git a/pkg/sentry/socket/netlink/socket.go b/pkg/sentry/socket/netlink/socket.go
index c4b95debb..2ca02567d 100644
--- a/pkg/sentry/socket/netlink/socket.go
+++ b/pkg/sentry/socket/netlink/socket.go
@@ -644,47 +644,38 @@ func (s *Socket) sendResponse(ctx context.Context, ms *MessageSet) *syserr.Error
return nil
}
-func (s *Socket) dumpErrorMesage(ctx context.Context, hdr linux.NetlinkMessageHeader, ms *MessageSet, err *syserr.Error) *syserr.Error {
+func dumpErrorMesage(hdr linux.NetlinkMessageHeader, ms *MessageSet, err *syserr.Error) {
m := ms.AddMessage(linux.NetlinkMessageHeader{
Type: linux.NLMSG_ERROR,
})
-
m.Put(linux.NetlinkErrorMessage{
Error: int32(-err.ToLinux().Number()),
Header: hdr,
})
- return nil
+}
+func dumpAckMesage(hdr linux.NetlinkMessageHeader, ms *MessageSet) {
+ m := ms.AddMessage(linux.NetlinkMessageHeader{
+ Type: linux.NLMSG_ERROR,
+ })
+ m.Put(linux.NetlinkErrorMessage{
+ Error: 0,
+ Header: hdr,
+ })
}
// processMessages handles each message in buf, passing it to the protocol
// handler for final handling.
func (s *Socket) processMessages(ctx context.Context, buf []byte) *syserr.Error {
for len(buf) > 0 {
- if len(buf) < linux.NetlinkMessageHeaderSize {
+ msg, rest, ok := ParseMessage(buf)
+ if !ok {
// Linux ignores messages that are too short. See
// net/netlink/af_netlink.c:netlink_rcv_skb.
break
}
-
- var hdr linux.NetlinkMessageHeader
- binary.Unmarshal(buf[:linux.NetlinkMessageHeaderSize], usermem.ByteOrder, &hdr)
-
- if hdr.Length < linux.NetlinkMessageHeaderSize || uint64(hdr.Length) > uint64(len(buf)) {
- // Linux ignores malformed messages. See
- // net/netlink/af_netlink.c:netlink_rcv_skb.
- break
- }
-
- // Data from this message.
- data := buf[linux.NetlinkMessageHeaderSize:hdr.Length]
-
- // Advance to the next message.
- next := alignUp(int(hdr.Length), linux.NLMSG_ALIGNTO)
- if next >= len(buf)-1 {
- next = len(buf) - 1
- }
- buf = buf[next:]
+ buf = rest
+ hdr := msg.Header()
// Ignore control messages.
if hdr.Type < linux.NLMSG_MIN_TYPE {
@@ -692,19 +683,10 @@ func (s *Socket) processMessages(ctx context.Context, buf []byte) *syserr.Error
}
ms := NewMessageSet(s.portID, hdr.Seq)
- var err *syserr.Error
- // TODO(b/68877377): ACKs not supported yet.
- if hdr.Flags&linux.NLM_F_ACK == linux.NLM_F_ACK {
- err = syserr.ErrNotSupported
- } else {
-
- err = s.protocol.ProcessMessage(ctx, hdr, data, ms)
- }
- if err != nil {
- ms = NewMessageSet(s.portID, hdr.Seq)
- if err := s.dumpErrorMesage(ctx, hdr, ms, err); err != nil {
- return err
- }
+ if err := s.protocol.ProcessMessage(ctx, msg, ms); err != nil {
+ dumpErrorMesage(hdr, ms, err)
+ } else if hdr.Flags&linux.NLM_F_ACK == linux.NLM_F_ACK {
+ dumpAckMesage(hdr, ms)
}
if err := s.sendResponse(ctx, ms); err != nil {
diff --git a/pkg/sentry/socket/netlink/uevent/protocol.go b/pkg/sentry/socket/netlink/uevent/protocol.go
index 1ee4296bc..029ba21b5 100644
--- a/pkg/sentry/socket/netlink/uevent/protocol.go
+++ b/pkg/sentry/socket/netlink/uevent/protocol.go
@@ -49,7 +49,7 @@ func (p *Protocol) CanSend() bool {
}
// ProcessMessage implements netlink.Protocol.ProcessMessage.
-func (p *Protocol) ProcessMessage(ctx context.Context, hdr linux.NetlinkMessageHeader, data []byte, ms *netlink.MessageSet) *syserr.Error {
+func (p *Protocol) ProcessMessage(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error {
// Silently ignore all messages.
return nil
}
diff --git a/pkg/sentry/socket/netstack/stack.go b/pkg/sentry/socket/netstack/stack.go
index 31ea66eca..0692482e9 100644
--- a/pkg/sentry/socket/netstack/stack.go
+++ b/pkg/sentry/socket/netstack/stack.go
@@ -20,6 +20,8 @@ import (
"gvisor.dev/gvisor/pkg/sentry/inet"
"gvisor.dev/gvisor/pkg/sentry/socket/netfilter"
"gvisor.dev/gvisor/pkg/syserr"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/iptables"
"gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
@@ -88,6 +90,59 @@ func (s *Stack) InterfaceAddrs() map[int32][]inet.InterfaceAddr {
return nicAddrs
}
+// AddInterfaceAddr implements inet.Stack.AddInterfaceAddr.
+func (s *Stack) AddInterfaceAddr(idx int32, addr inet.InterfaceAddr) error {
+ var (
+ protocol tcpip.NetworkProtocolNumber
+ address tcpip.Address
+ )
+ switch addr.Family {
+ case linux.AF_INET:
+ if len(addr.Addr) < header.IPv4AddressSize {
+ return syserror.EINVAL
+ }
+ if addr.PrefixLen > header.IPv4AddressSize*8 {
+ return syserror.EINVAL
+ }
+ protocol = ipv4.ProtocolNumber
+ address = tcpip.Address(addr.Addr[:header.IPv4AddressSize])
+
+ case linux.AF_INET6:
+ if len(addr.Addr) < header.IPv6AddressSize {
+ return syserror.EINVAL
+ }
+ if addr.PrefixLen > header.IPv6AddressSize*8 {
+ return syserror.EINVAL
+ }
+ protocol = ipv6.ProtocolNumber
+ address = tcpip.Address(addr.Addr[:header.IPv6AddressSize])
+
+ default:
+ return syserror.ENOTSUP
+ }
+
+ protocolAddress := tcpip.ProtocolAddress{
+ Protocol: protocol,
+ AddressWithPrefix: tcpip.AddressWithPrefix{
+ Address: address,
+ PrefixLen: int(addr.PrefixLen),
+ },
+ }
+
+ // Attach address to interface.
+ if err := s.Stack.AddProtocolAddressWithOptions(tcpip.NICID(idx), protocolAddress, stack.CanBePrimaryEndpoint); err != nil {
+ return syserr.TranslateNetstackError(err).ToError()
+ }
+
+ // Add route for local network.
+ s.Stack.AddRoute(tcpip.Route{
+ Destination: protocolAddress.AddressWithPrefix.Subnet(),
+ Gateway: "", // No gateway for local network.
+ NIC: tcpip.NICID(idx),
+ })
+ return nil
+}
+
// TCPReceiveBufferSize implements inet.Stack.TCPReceiveBufferSize.
func (s *Stack) TCPReceiveBufferSize() (inet.TCPBufferSize, error) {
var rs tcp.ReceiveBufferSizeOption
diff --git a/pkg/sentry/syscalls/linux/linux64_amd64.go b/pkg/sentry/syscalls/linux/linux64_amd64.go
index 7435b50bf..588f8b087 100644
--- a/pkg/sentry/syscalls/linux/linux64_amd64.go
+++ b/pkg/sentry/syscalls/linux/linux64_amd64.go
@@ -228,18 +228,21 @@ var AMD64 = &kernel.SyscallTable{
185: syscalls.Error("security", syserror.ENOSYS, "Not implemented in Linux.", nil),
186: syscalls.Supported("gettid", Gettid),
187: syscalls.Supported("readahead", Readahead),
- 188: syscalls.PartiallySupported("setxattr", SetXattr, "Only supported for tmpfs.", nil),
- 189: syscalls.PartiallySupported("lsetxattr", LSetXattr, "Only supported for tmpfs.", nil),
- 190: syscalls.PartiallySupported("fsetxattr", FSetXattr, "Only supported for tmpfs.", nil),
- 191: syscalls.PartiallySupported("getxattr", GetXattr, "Only supported for tmpfs.", nil),
- 192: syscalls.PartiallySupported("lgetxattr", LGetXattr, "Only supported for tmpfs.", nil),
- 193: syscalls.PartiallySupported("fgetxattr", FGetXattr, "Only supported for tmpfs.", nil),
- 194: syscalls.ErrorWithEvent("listxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
- 195: syscalls.ErrorWithEvent("llistxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
- 196: syscalls.ErrorWithEvent("flistxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
- 197: syscalls.ErrorWithEvent("removexattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
- 198: syscalls.ErrorWithEvent("lremovexattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
- 199: syscalls.ErrorWithEvent("fremovexattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ // TODO(b/148303075): Enable set/getxattr (in their various
+ // forms) once we also have list and removexattr. The JVM
+ // assumes that if get/set exist, then list and remove do too.
+ 188: syscalls.ErrorWithEvent("setxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
+ 189: syscalls.ErrorWithEvent("lsetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
+ 190: syscalls.ErrorWithEvent("fsetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
+ 191: syscalls.ErrorWithEvent("getxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
+ 192: syscalls.ErrorWithEvent("lgetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
+ 193: syscalls.ErrorWithEvent("fgetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
+ 194: syscalls.ErrorWithEvent("listxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
+ 195: syscalls.ErrorWithEvent("llistxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
+ 196: syscalls.ErrorWithEvent("flistxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
+ 197: syscalls.ErrorWithEvent("removexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
+ 198: syscalls.ErrorWithEvent("lremovexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
+ 199: syscalls.ErrorWithEvent("fremovexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
200: syscalls.Supported("tkill", Tkill),
201: syscalls.Supported("time", Time),
202: syscalls.PartiallySupported("futex", Futex, "Robust futexes not supported.", nil),
diff --git a/pkg/sentry/syscalls/linux/linux64_arm64.go b/pkg/sentry/syscalls/linux/linux64_arm64.go
index 03a39fe65..06e5ee401 100644
--- a/pkg/sentry/syscalls/linux/linux64_arm64.go
+++ b/pkg/sentry/syscalls/linux/linux64_arm64.go
@@ -36,23 +36,26 @@ var ARM64 = &kernel.SyscallTable{
},
AuditNumber: linux.AUDIT_ARCH_AARCH64,
Table: map[uintptr]kernel.Syscall{
- 0: syscalls.PartiallySupported("io_setup", IoSetup, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
- 1: syscalls.PartiallySupported("io_destroy", IoDestroy, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
- 2: syscalls.PartiallySupported("io_submit", IoSubmit, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
- 3: syscalls.PartiallySupported("io_cancel", IoCancel, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
- 4: syscalls.PartiallySupported("io_getevents", IoGetevents, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
- 5: syscalls.PartiallySupported("setxattr", SetXattr, "Only supported for tmpfs.", nil),
- 6: syscalls.PartiallySupported("lsetxattr", LSetXattr, "Only supported for tmpfs.", nil),
- 7: syscalls.PartiallySupported("fsetxattr", FSetXattr, "Only supported for tmpfs.", nil),
- 8: syscalls.PartiallySupported("getxattr", GetXattr, "Only supported for tmpfs.", nil),
- 9: syscalls.PartiallySupported("lgetxattr", LGetXattr, "Only supported for tmpfs.", nil),
- 10: syscalls.PartiallySupported("fgetxattr", FGetXattr, "Only supported for tmpfs.", nil),
- 11: syscalls.ErrorWithEvent("listxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
- 12: syscalls.ErrorWithEvent("llistxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
- 13: syscalls.ErrorWithEvent("flistxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
- 14: syscalls.ErrorWithEvent("removexattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
- 15: syscalls.ErrorWithEvent("lremovexattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
- 16: syscalls.ErrorWithEvent("fremovexattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ 0: syscalls.PartiallySupported("io_setup", IoSetup, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
+ 1: syscalls.PartiallySupported("io_destroy", IoDestroy, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
+ 2: syscalls.PartiallySupported("io_submit", IoSubmit, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
+ 3: syscalls.PartiallySupported("io_cancel", IoCancel, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
+ 4: syscalls.PartiallySupported("io_getevents", IoGetevents, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
+ // TODO(b/148303075): Enable set/getxattr (in their various
+ // forms) once we also have list and removexattr. The JVM
+ // assumes that if get/set exist, then list and remove do too.
+ 5: syscalls.ErrorWithEvent("setxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
+ 6: syscalls.ErrorWithEvent("lsetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
+ 7: syscalls.ErrorWithEvent("fsetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
+ 8: syscalls.ErrorWithEvent("getxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
+ 9: syscalls.ErrorWithEvent("lgetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
+ 10: syscalls.ErrorWithEvent("fgetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
+ 11: syscalls.ErrorWithEvent("listxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
+ 13: syscalls.ErrorWithEvent("llistxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
+ 13: syscalls.ErrorWithEvent("flistxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
+ 14: syscalls.ErrorWithEvent("removexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
+ 15: syscalls.ErrorWithEvent("lremovexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
+ 16: syscalls.ErrorWithEvent("fremovexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
17: syscalls.Supported("getcwd", Getcwd),
18: syscalls.CapError("lookup_dcookie", linux.CAP_SYS_ADMIN, "", nil),
19: syscalls.Supported("eventfd2", Eventfd2),
diff --git a/pkg/sentry/syscalls/linux/sys_timer.go b/pkg/sentry/syscalls/linux/sys_timer.go
index 432351917..a4c400f87 100644
--- a/pkg/sentry/syscalls/linux/sys_timer.go
+++ b/pkg/sentry/syscalls/linux/sys_timer.go
@@ -146,7 +146,7 @@ func TimerCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S
return 0, nil, err
}
- return uintptr(id), nil, nil
+ return 0, nil, nil
}
// TimerSettime implements linux syscall timer_settime(2).
diff --git a/pkg/sentry/vfs/lock/BUILD b/pkg/sentry/vfs/lock/BUILD
new file mode 100644
index 000000000..d9ab063b7
--- /dev/null
+++ b/pkg/sentry/vfs/lock/BUILD
@@ -0,0 +1,13 @@
+load("//tools:defs.bzl", "go_library")
+
+package(licenses = ["notice"])
+
+go_library(
+ name = "lock",
+ srcs = ["lock.go"],
+ visibility = ["//pkg/sentry:internal"],
+ deps = [
+ "//pkg/sentry/fs/lock",
+ "//pkg/syserror",
+ ],
+)
diff --git a/pkg/sentry/vfs/lock/lock.go b/pkg/sentry/vfs/lock/lock.go
new file mode 100644
index 000000000..724dfe743
--- /dev/null
+++ b/pkg/sentry/vfs/lock/lock.go
@@ -0,0 +1,72 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package lock provides POSIX and BSD style file locking for VFS2 file
+// implementations.
+//
+// The actual implementations can be found in the lock package under
+// sentry/fs/lock.
+package lock
+
+import (
+ fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
+ "gvisor.dev/gvisor/pkg/syserror"
+)
+
+// FileLocks supports POSIX and BSD style locks, which correspond to fcntl(2)
+// and flock(2) respectively in Linux. It can be embedded into various file
+// implementations for VFS2 that support locking.
+//
+// Note that in Linux these two types of locks are _not_ cooperative, because
+// race and deadlock conditions make merging them prohibitive. We do the same
+// and keep them oblivious to each other.
+type FileLocks struct {
+ // bsd is a set of BSD-style advisory file wide locks, see flock(2).
+ bsd fslock.Locks
+
+ // posix is a set of POSIX-style regional advisory locks, see fcntl(2).
+ posix fslock.Locks
+}
+
+// LockBSD tries to acquire a BSD-style lock on the entire file.
+func (fl *FileLocks) LockBSD(uid fslock.UniqueID, t fslock.LockType, block fslock.Blocker) error {
+ if fl.bsd.LockRegion(uid, t, fslock.LockRange{0, fslock.LockEOF}, block) {
+ return nil
+ }
+ return syserror.ErrWouldBlock
+}
+
+// UnlockBSD releases a BSD-style lock on the entire file.
+//
+// This operation is always successful, even if there did not exist a lock on
+// the requested region held by uid in the first place.
+func (fl *FileLocks) UnlockBSD(uid fslock.UniqueID) {
+ fl.bsd.UnlockRegion(uid, fslock.LockRange{0, fslock.LockEOF})
+}
+
+// LockPOSIX tries to acquire a POSIX-style lock on a file region.
+func (fl *FileLocks) LockPOSIX(uid fslock.UniqueID, t fslock.LockType, rng fslock.LockRange, block fslock.Blocker) error {
+ if fl.posix.LockRegion(uid, t, rng, block) {
+ return nil
+ }
+ return syserror.ErrWouldBlock
+}
+
+// UnlockPOSIX releases a POSIX-style lock on a file region.
+//
+// This operation is always successful, even if there did not exist a lock on
+// the requested region held by uid in the first place.
+func (fl *FileLocks) UnlockPOSIX(uid fslock.UniqueID, rng fslock.LockRange) {
+ fl.posix.UnlockRegion(uid, rng)
+}
diff --git a/pkg/sentry/vfs/mount.go b/pkg/sentry/vfs/mount.go
index d39528051..1fbb420f9 100644
--- a/pkg/sentry/vfs/mount.go
+++ b/pkg/sentry/vfs/mount.go
@@ -423,7 +423,8 @@ func (mntns *MountNamespace) IncRef() {
}
// DecRef decrements mntns' reference count.
-func (mntns *MountNamespace) DecRef(vfs *VirtualFilesystem) {
+func (mntns *MountNamespace) DecRef() {
+ vfs := mntns.root.fs.VirtualFilesystem()
if refs := atomic.AddInt64(&mntns.refs, -1); refs == 0 {
vfs.mountMu.Lock()
vfs.mounts.seq.BeginWrite()
diff --git a/pkg/tcpip/header/eth.go b/pkg/tcpip/header/eth.go
index f5d2c127f..b1e92d2d7 100644
--- a/pkg/tcpip/header/eth.go
+++ b/pkg/tcpip/header/eth.go
@@ -134,3 +134,44 @@ func IsValidUnicastEthernetAddress(addr tcpip.LinkAddress) bool {
// addr is a valid unicast ethernet address.
return true
}
+
+// EthernetAddressFromMulticastIPv4Address returns a multicast Ethernet address
+// for a multicast IPv4 address.
+//
+// addr MUST be a multicast IPv4 address.
+func EthernetAddressFromMulticastIPv4Address(addr tcpip.Address) tcpip.LinkAddress {
+ var linkAddrBytes [EthernetAddressSize]byte
+ // RFC 1112 Host Extensions for IP Multicasting
+ //
+ // 6.4. Extensions to an Ethernet Local Network Module:
+ //
+ // An IP host group address is mapped to an Ethernet multicast
+ // address by placing the low-order 23-bits of the IP address
+ // into the low-order 23 bits of the Ethernet multicast address
+ // 01-00-5E-00-00-00 (hex).
+ linkAddrBytes[0] = 0x1
+ linkAddrBytes[2] = 0x5e
+ linkAddrBytes[3] = addr[1] & 0x7F
+ copy(linkAddrBytes[4:], addr[IPv4AddressSize-2:])
+ return tcpip.LinkAddress(linkAddrBytes[:])
+}
+
+// EthernetAddressFromMulticastIPv6Address returns a multicast Ethernet address
+// for a multicast IPv6 address.
+//
+// addr MUST be a multicast IPv6 address.
+func EthernetAddressFromMulticastIPv6Address(addr tcpip.Address) tcpip.LinkAddress {
+ // RFC 2464 Transmission of IPv6 Packets over Ethernet Networks
+ //
+ // 7. Address Mapping -- Multicast
+ //
+ // An IPv6 packet with a multicast destination address DST,
+ // consisting of the sixteen octets DST[1] through DST[16], is
+ // transmitted to the Ethernet multicast address whose first
+ // two octets are the value 3333 hexadecimal and whose last
+ // four octets are the last four octets of DST.
+ linkAddrBytes := []byte(addr[IPv6AddressSize-EthernetAddressSize:])
+ linkAddrBytes[0] = 0x33
+ linkAddrBytes[1] = 0x33
+ return tcpip.LinkAddress(linkAddrBytes[:])
+}
diff --git a/pkg/tcpip/header/eth_test.go b/pkg/tcpip/header/eth_test.go
index 6634c90f5..7a0014ad9 100644
--- a/pkg/tcpip/header/eth_test.go
+++ b/pkg/tcpip/header/eth_test.go
@@ -66,3 +66,37 @@ func TestIsValidUnicastEthernetAddress(t *testing.T) {
})
}
}
+
+func TestEthernetAddressFromMulticastIPv4Address(t *testing.T) {
+ tests := []struct {
+ name string
+ addr tcpip.Address
+ expectedLinkAddr tcpip.LinkAddress
+ }{
+ {
+ name: "IPv4 Multicast without 24th bit set",
+ addr: "\xe0\x7e\xdc\xba",
+ expectedLinkAddr: "\x01\x00\x5e\x7e\xdc\xba",
+ },
+ {
+ name: "IPv4 Multicast with 24th bit set",
+ addr: "\xe0\xfe\xdc\xba",
+ expectedLinkAddr: "\x01\x00\x5e\x7e\xdc\xba",
+ },
+ }
+
+ for _, test := range tests {
+ t.Run(test.name, func(t *testing.T) {
+ if got := EthernetAddressFromMulticastIPv4Address(test.addr); got != test.expectedLinkAddr {
+ t.Fatalf("got EthernetAddressFromMulticastIPv4Address(%s) = %s, want = %s", got, test.expectedLinkAddr)
+ }
+ })
+ }
+}
+
+func TestEthernetAddressFromMulticastIPv6Address(t *testing.T) {
+ addr := tcpip.Address("\xff\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x1a")
+ if got, want := EthernetAddressFromMulticastIPv6Address(addr), tcpip.LinkAddress("\x33\x33\x0d\x0e\x0f\x1a"); got != want {
+ t.Fatalf("got EthernetAddressFromMulticastIPv6Address(%s) = %s, want = %s", addr, got, want)
+ }
+}
diff --git a/pkg/tcpip/header/ipv6_test.go b/pkg/tcpip/header/ipv6_test.go
index 29f54bc57..c3ad503aa 100644
--- a/pkg/tcpip/header/ipv6_test.go
+++ b/pkg/tcpip/header/ipv6_test.go
@@ -17,6 +17,7 @@ package header_test
import (
"bytes"
"crypto/sha256"
+ "fmt"
"testing"
"github.com/google/go-cmp/cmp"
@@ -300,3 +301,31 @@ func TestScopeForIPv6Address(t *testing.T) {
})
}
}
+
+func TestSolicitedNodeAddr(t *testing.T) {
+ tests := []struct {
+ addr tcpip.Address
+ want tcpip.Address
+ }{
+ {
+ addr: "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\xa0",
+ want: "\xff\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\xff\x0e\x0f\xa0",
+ },
+ {
+ addr: "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\xdd\x0e\x0f\xa0",
+ want: "\xff\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\xff\x0e\x0f\xa0",
+ },
+ {
+ addr: "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\xdd\x01\x02\x03",
+ want: "\xff\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\xff\x01\x02\x03",
+ },
+ }
+
+ for _, test := range tests {
+ t.Run(fmt.Sprintf("%s", test.addr), func(t *testing.T) {
+ if got := header.SolicitedNodeAddr(test.addr); got != test.want {
+ t.Fatalf("got header.SolicitedNodeAddr(%s) = %s, want = %s", test.addr, got, test.want)
+ }
+ })
+ }
+}
diff --git a/pkg/tcpip/iptables/types.go b/pkg/tcpip/iptables/types.go
index d660aab04..7d593c35c 100644
--- a/pkg/tcpip/iptables/types.go
+++ b/pkg/tcpip/iptables/types.go
@@ -132,7 +132,7 @@ type Table struct {
// ValidHooks returns a bitmap of the builtin hooks for the given table.
func (table *Table) ValidHooks() uint32 {
hooks := uint32(0)
- for hook, _ := range table.BuiltinChains {
+ for hook := range table.BuiltinChains {
hooks |= 1 << hook
}
return hooks
diff --git a/pkg/tcpip/link/channel/channel.go b/pkg/tcpip/link/channel/channel.go
index 71b9da797..78d447acd 100644
--- a/pkg/tcpip/link/channel/channel.go
+++ b/pkg/tcpip/link/channel/channel.go
@@ -30,15 +30,16 @@ type PacketInfo struct {
Pkt tcpip.PacketBuffer
Proto tcpip.NetworkProtocolNumber
GSO *stack.GSO
+ Route stack.Route
}
// Endpoint is link layer endpoint that stores outbound packets in a channel
// and allows injection of inbound packets.
type Endpoint struct {
- dispatcher stack.NetworkDispatcher
- mtu uint32
- linkAddr tcpip.LinkAddress
- GSO bool
+ dispatcher stack.NetworkDispatcher
+ mtu uint32
+ linkAddr tcpip.LinkAddress
+ LinkEPCapabilities stack.LinkEndpointCapabilities
// c is where outbound packets are queued.
c chan PacketInfo
@@ -122,11 +123,7 @@ func (e *Endpoint) MTU() uint32 {
// Capabilities implements stack.LinkEndpoint.Capabilities.
func (e *Endpoint) Capabilities() stack.LinkEndpointCapabilities {
- caps := stack.LinkEndpointCapabilities(0)
- if e.GSO {
- caps |= stack.CapabilityHardwareGSO
- }
- return caps
+ return e.LinkEPCapabilities
}
// GSOMaxSize returns the maximum GSO packet size.
@@ -146,11 +143,16 @@ func (e *Endpoint) LinkAddress() tcpip.LinkAddress {
}
// WritePacket stores outbound packets into the channel.
-func (e *Endpoint) WritePacket(_ *stack.Route, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt tcpip.PacketBuffer) *tcpip.Error {
+func (e *Endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt tcpip.PacketBuffer) *tcpip.Error {
+ // Clone r then release its resource so we only get the relevant fields from
+ // stack.Route without holding a reference to a NIC's endpoint.
+ route := r.Clone()
+ route.Release()
p := PacketInfo{
Pkt: pkt,
Proto: protocol,
GSO: gso,
+ Route: route,
}
select {
@@ -162,7 +164,11 @@ func (e *Endpoint) WritePacket(_ *stack.Route, gso *stack.GSO, protocol tcpip.Ne
}
// WritePackets stores outbound packets into the channel.
-func (e *Endpoint) WritePackets(_ *stack.Route, gso *stack.GSO, pkts []tcpip.PacketBuffer, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) {
+func (e *Endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts []tcpip.PacketBuffer, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) {
+ // Clone r then release its resource so we only get the relevant fields from
+ // stack.Route without holding a reference to a NIC's endpoint.
+ route := r.Clone()
+ route.Release()
payloadView := pkts[0].Data.ToView()
n := 0
packetLoop:
@@ -176,6 +182,7 @@ packetLoop:
},
Proto: protocol,
GSO: gso,
+ Route: route,
}
select {
diff --git a/pkg/tcpip/network/arp/arp.go b/pkg/tcpip/network/arp/arp.go
index 1ceaebfbd..4da13c5df 100644
--- a/pkg/tcpip/network/arp/arp.go
+++ b/pkg/tcpip/network/arp/arp.go
@@ -178,24 +178,9 @@ func (*protocol) ResolveStaticAddress(addr tcpip.Address) (tcpip.LinkAddress, bo
return broadcastMAC, true
}
if header.IsV4MulticastAddress(addr) {
- // RFC 1112 Host Extensions for IP Multicasting
- //
- // 6.4. Extensions to an Ethernet Local Network Module:
- //
- // An IP host group address is mapped to an Ethernet multicast
- // address by placing the low-order 23-bits of the IP address
- // into the low-order 23 bits of the Ethernet multicast address
- // 01-00-5E-00-00-00 (hex).
- return tcpip.LinkAddress([]byte{
- 0x01,
- 0x00,
- 0x5e,
- addr[header.IPv4AddressSize-3] & 0x7f,
- addr[header.IPv4AddressSize-2],
- addr[header.IPv4AddressSize-1],
- }), true
+ return header.EthernetAddressFromMulticastIPv4Address(addr), true
}
- return "", false
+ return tcpip.LinkAddress([]byte(nil)), false
}
// SetOption implements NetworkProtocol.
diff --git a/pkg/tcpip/network/ipv6/icmp.go b/pkg/tcpip/network/ipv6/icmp.go
index dc20c0fd7..60817d36d 100644
--- a/pkg/tcpip/network/ipv6/icmp.go
+++ b/pkg/tcpip/network/ipv6/icmp.go
@@ -408,10 +408,14 @@ func (*protocol) LinkAddressProtocol() tcpip.NetworkProtocolNumber {
// LinkAddressRequest implements stack.LinkAddressResolver.
func (*protocol) LinkAddressRequest(addr, localAddr tcpip.Address, linkEP stack.LinkEndpoint) *tcpip.Error {
snaddr := header.SolicitedNodeAddr(addr)
+
+ // TODO(b/148672031): Use stack.FindRoute instead of manually creating the
+ // route here. Note, we would need the nicID to do this properly so the right
+ // NIC (associated to linkEP) is used to send the NDP NS message.
r := &stack.Route{
LocalAddress: localAddr,
RemoteAddress: snaddr,
- RemoteLinkAddress: broadcastMAC,
+ RemoteLinkAddress: header.EthernetAddressFromMulticastIPv6Address(snaddr),
}
hdr := buffer.NewPrependable(int(linkEP.MaxHeaderLength()) + header.IPv6MinimumSize + header.ICMPv6NeighborAdvertSize)
pkt := header.ICMPv6(hdr.Prepend(header.ICMPv6NeighborAdvertSize))
@@ -441,23 +445,7 @@ func (*protocol) LinkAddressRequest(addr, localAddr tcpip.Address, linkEP stack.
// ResolveStaticAddress implements stack.LinkAddressResolver.
func (*protocol) ResolveStaticAddress(addr tcpip.Address) (tcpip.LinkAddress, bool) {
if header.IsV6MulticastAddress(addr) {
- // RFC 2464 Transmission of IPv6 Packets over Ethernet Networks
- //
- // 7. Address Mapping -- Multicast
- //
- // An IPv6 packet with a multicast destination address DST,
- // consisting of the sixteen octets DST[1] through DST[16], is
- // transmitted to the Ethernet multicast address whose first
- // two octets are the value 3333 hexadecimal and whose last
- // four octets are the last four octets of DST.
- return tcpip.LinkAddress([]byte{
- 0x33,
- 0x33,
- addr[header.IPv6AddressSize-4],
- addr[header.IPv6AddressSize-3],
- addr[header.IPv6AddressSize-2],
- addr[header.IPv6AddressSize-1],
- }), true
+ return header.EthernetAddressFromMulticastIPv6Address(addr), true
}
- return "", false
+ return tcpip.LinkAddress([]byte(nil)), false
}
diff --git a/pkg/tcpip/network/ipv6/icmp_test.go b/pkg/tcpip/network/ipv6/icmp_test.go
index 7a6820643..d0e930e20 100644
--- a/pkg/tcpip/network/ipv6/icmp_test.go
+++ b/pkg/tcpip/network/ipv6/icmp_test.go
@@ -270,8 +270,9 @@ func (c *testContext) cleanup() {
}
type routeArgs struct {
- src, dst *channel.Endpoint
- typ header.ICMPv6Type
+ src, dst *channel.Endpoint
+ typ header.ICMPv6Type
+ remoteLinkAddr tcpip.LinkAddress
}
func routeICMPv6Packet(t *testing.T, args routeArgs, fn func(*testing.T, header.ICMPv6)) {
@@ -292,6 +293,11 @@ func routeICMPv6Packet(t *testing.T, args routeArgs, fn func(*testing.T, header.
t.Errorf("unexpected protocol number %d", pi.Proto)
return
}
+
+ if len(args.remoteLinkAddr) != 0 && args.remoteLinkAddr != pi.Route.RemoteLinkAddress {
+ t.Errorf("got remote link address = %s, want = %s", pi.Route.RemoteLinkAddress, args.remoteLinkAddr)
+ }
+
ipv6 := header.IPv6(pi.Pkt.Header.View())
transProto := tcpip.TransportProtocolNumber(ipv6.NextHeader())
if transProto != header.ICMPv6ProtocolNumber {
@@ -339,7 +345,7 @@ func TestLinkResolution(t *testing.T) {
t.Fatalf("ep.Write(_) = _, <non-nil>, %s, want = _, <non-nil>, tcpip.ErrNoLinkAddress", err)
}
for _, args := range []routeArgs{
- {src: c.linkEP0, dst: c.linkEP1, typ: header.ICMPv6NeighborSolicit},
+ {src: c.linkEP0, dst: c.linkEP1, typ: header.ICMPv6NeighborSolicit, remoteLinkAddr: header.EthernetAddressFromMulticastIPv6Address(header.SolicitedNodeAddr(lladdr1))},
{src: c.linkEP1, dst: c.linkEP0, typ: header.ICMPv6NeighborAdvert},
} {
routeICMPv6Packet(t, args, func(t *testing.T, icmpv6 header.ICMPv6) {
diff --git a/pkg/tcpip/stack/ndp.go b/pkg/tcpip/stack/ndp.go
index 31294345d..6123fda33 100644
--- a/pkg/tcpip/stack/ndp.go
+++ b/pkg/tcpip/stack/ndp.go
@@ -538,6 +538,14 @@ func (ndp *ndpState) sendDADPacket(addr tcpip.Address) *tcpip.Error {
r := makeRoute(header.IPv6ProtocolNumber, header.IPv6Any, snmc, ndp.nic.linkEP.LinkAddress(), ref, false, false)
defer r.Release()
+ // Route should resolve immediately since snmc is a multicast address so a
+ // remote link address can be calculated without a resolution process.
+ if c, err := r.Resolve(nil); err != nil {
+ log.Fatalf("ndp: error when resolving route to send NDP NS for DAD (%s -> %s on NIC(%d)): %s", header.IPv6Any, snmc, ndp.nic.ID(), err)
+ } else if c != nil {
+ log.Fatalf("ndp: route resolution not immediate for route to send NDP NS for DAD (%s -> %s on NIC(%d))", header.IPv6Any, snmc, ndp.nic.ID())
+ }
+
hdr := buffer.NewPrependable(int(r.MaxHeaderLength()) + header.ICMPv6NeighborSolicitMinimumSize)
pkt := header.ICMPv6(hdr.Prepend(header.ICMPv6NeighborSolicitMinimumSize))
pkt.SetType(header.ICMPv6NeighborSolicit)
@@ -1197,6 +1205,15 @@ func (ndp *ndpState) startSolicitingRouters() {
r := makeRoute(header.IPv6ProtocolNumber, header.IPv6Any, header.IPv6AllRoutersMulticastAddress, ndp.nic.linkEP.LinkAddress(), ref, false, false)
defer r.Release()
+ // Route should resolve immediately since
+ // header.IPv6AllRoutersMulticastAddress is a multicast address so a
+ // remote link address can be calculated without a resolution process.
+ if c, err := r.Resolve(nil); err != nil {
+ log.Fatalf("ndp: error when resolving route to send NDP RS (%s -> %s on NIC(%d)): %s", header.IPv6Any, header.IPv6AllRoutersMulticastAddress, ndp.nic.ID(), err)
+ } else if c != nil {
+ log.Fatalf("ndp: route resolution not immediate for route to send NDP RS (%s -> %s on NIC(%d))", header.IPv6Any, header.IPv6AllRoutersMulticastAddress, ndp.nic.ID())
+ }
+
payloadSize := header.ICMPv6HeaderSize + header.NDPRSMinimumSize
hdr := buffer.NewPrependable(header.IPv6MinimumSize + payloadSize)
pkt := header.ICMPv6(hdr.Prepend(payloadSize))
diff --git a/pkg/tcpip/stack/ndp_test.go b/pkg/tcpip/stack/ndp_test.go
index bc7cfbcb4..8af8565f7 100644
--- a/pkg/tcpip/stack/ndp_test.go
+++ b/pkg/tcpip/stack/ndp_test.go
@@ -336,6 +336,7 @@ func TestDADResolve(t *testing.T) {
opts.NDPConfigs.DupAddrDetectTransmits = test.dupAddrDetectTransmits
e := channel.New(int(test.dupAddrDetectTransmits), 1280, linkAddr1)
+ e.LinkEPCapabilities |= stack.CapabilityResolutionRequired
s := stack.New(opts)
if err := s.CreateNIC(nicID, e); err != nil {
t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
@@ -413,6 +414,12 @@ func TestDADResolve(t *testing.T) {
t.Fatalf("got Proto = %d, want = %d", p.Proto, header.IPv6ProtocolNumber)
}
+ // Make sure the right remote link address is used.
+ snmc := header.SolicitedNodeAddr(addr1)
+ if want := header.EthernetAddressFromMulticastIPv6Address(snmc); p.Route.RemoteLinkAddress != want {
+ t.Errorf("got remote link address = %s, want = %s", p.Route.RemoteLinkAddress, want)
+ }
+
// Check NDP NS packet.
//
// As per RFC 4861 section 4.3, a possible option is the Source Link
@@ -420,7 +427,7 @@ func TestDADResolve(t *testing.T) {
// address of the packet is the unspecified address.
checker.IPv6(t, p.Pkt.Header.View().ToVectorisedView().First(),
checker.SrcAddr(header.IPv6Any),
- checker.DstAddr(header.SolicitedNodeAddr(addr1)),
+ checker.DstAddr(snmc),
checker.TTL(header.NDPHopLimit),
checker.NDPNS(
checker.NDPNSTargetAddress(addr1),
@@ -3292,6 +3299,7 @@ func TestRouterSolicitation(t *testing.T) {
t.Run(test.name, func(t *testing.T) {
t.Parallel()
e := channel.New(int(test.maxRtrSolicit), 1280, linkAddr1)
+ e.LinkEPCapabilities |= stack.CapabilityResolutionRequired
waitForPkt := func(timeout time.Duration) {
t.Helper()
ctx, _ := context.WithTimeout(context.Background(), timeout)
@@ -3304,6 +3312,12 @@ func TestRouterSolicitation(t *testing.T) {
if p.Proto != header.IPv6ProtocolNumber {
t.Fatalf("got Proto = %d, want = %d", p.Proto, header.IPv6ProtocolNumber)
}
+
+ // Make sure the right remote link address is used.
+ if want := header.EthernetAddressFromMulticastIPv6Address(header.IPv6AllRoutersMulticastAddress); p.Route.RemoteLinkAddress != want {
+ t.Errorf("got remote link address = %s, want = %s", p.Route.RemoteLinkAddress, want)
+ }
+
checker.IPv6(t,
p.Pkt.Header.View(),
checker.SrcAddr(header.IPv6Any),
diff --git a/pkg/tcpip/stack/route.go b/pkg/tcpip/stack/route.go
index 517f4b941..f565aafb2 100644
--- a/pkg/tcpip/stack/route.go
+++ b/pkg/tcpip/stack/route.go
@@ -225,7 +225,9 @@ func (r *Route) Release() {
// Clone Clone a route such that the original one can be released and the new
// one will remain valid.
func (r *Route) Clone() Route {
- r.ref.incRef()
+ if r.ref != nil {
+ r.ref.incRef()
+ }
return *r
}
diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go
index 7057b110e..b793f1d74 100644
--- a/pkg/tcpip/stack/stack.go
+++ b/pkg/tcpip/stack/stack.go
@@ -795,6 +795,8 @@ func (s *Stack) Forwarding() bool {
// SetRouteTable assigns the route table to be used by this stack. It
// specifies which NIC to use for given destination address ranges.
+//
+// This method takes ownership of the table.
func (s *Stack) SetRouteTable(table []tcpip.Route) {
s.mu.Lock()
defer s.mu.Unlock()
@@ -809,6 +811,13 @@ func (s *Stack) GetRouteTable() []tcpip.Route {
return append([]tcpip.Route(nil), s.routeTable...)
}
+// AddRoute appends a route to the route table.
+func (s *Stack) AddRoute(route tcpip.Route) {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ s.routeTable = append(s.routeTable, route)
+}
+
// NewEndpoint creates a new transport layer endpoint of the given protocol.
func (s *Stack) NewEndpoint(transport tcpip.TransportProtocolNumber, network tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
t, ok := s.transportProtocols[transport]
diff --git a/pkg/tcpip/transport/tcp/accept.go b/pkg/tcpip/transport/tcp/accept.go
index 6101f2945..08afb7c17 100644
--- a/pkg/tcpip/transport/tcp/accept.go
+++ b/pkg/tcpip/transport/tcp/accept.go
@@ -271,8 +271,8 @@ func (l *listenContext) createConnectingEndpoint(s *segment, iss seqnum.Value, i
return n, nil
}
-// createEndpoint creates a new endpoint in connected state and then performs
-// the TCP 3-way handshake.
+// createEndpointAndPerformHandshake creates a new endpoint in connected state
+// and then performs the TCP 3-way handshake.
func (l *listenContext) createEndpointAndPerformHandshake(s *segment, opts *header.TCPSynOptions, queue *waiter.Queue) (*endpoint, *tcpip.Error) {
// Create new endpoint.
irs := s.sequenceNumber
diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go
index 9ff7ac261..5c5397823 100644
--- a/pkg/tcpip/transport/tcp/connect.go
+++ b/pkg/tcpip/transport/tcp/connect.go
@@ -989,6 +989,10 @@ func (e *endpoint) transitionToStateCloseLocked() {
// to any other listening endpoint. We reply with RST if we cannot find one.
func (e *endpoint) tryDeliverSegmentFromClosedEndpoint(s *segment) {
ep := e.stack.FindTransportEndpoint(e.NetProto, e.TransProto, e.ID, &s.route)
+ if ep == nil && e.NetProto == header.IPv6ProtocolNumber && e.EndpointInfo.TransportEndpointInfo.ID.LocalAddress.To4() != "" {
+ // Dual-stack socket, try IPv4.
+ ep = e.stack.FindTransportEndpoint(header.IPv4ProtocolNumber, e.TransProto, e.ID, &s.route)
+ }
if ep == nil {
replyWithReset(s)
s.decRef()
diff --git a/pkg/tcpip/transport/tcp/testing/context/context.go b/pkg/tcpip/transport/tcp/testing/context/context.go
index 730ac4292..1e9a0dea3 100644
--- a/pkg/tcpip/transport/tcp/testing/context/context.go
+++ b/pkg/tcpip/transport/tcp/testing/context/context.go
@@ -1082,7 +1082,11 @@ func (c *Context) SACKEnabled() bool {
// SetGSOEnabled enables or disables generic segmentation offload.
func (c *Context) SetGSOEnabled(enable bool) {
- c.linkEP.GSO = enable
+ if enable {
+ c.linkEP.LinkEPCapabilities |= stack.CapabilityHardwareGSO
+ } else {
+ c.linkEP.LinkEPCapabilities &^= stack.CapabilityHardwareGSO
+ }
}
// MSSWithoutOptions returns the value for the MSS used by the stack when no
diff --git a/runsc/BUILD b/runsc/BUILD
index b35b41d81..375241921 100644
--- a/runsc/BUILD
+++ b/runsc/BUILD
@@ -117,4 +117,5 @@ sh_test(
srcs = ["version_test.sh"],
args = ["$(location :runsc)"],
data = [":runsc"],
+ tags = ["noguitar"],
)
diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go
index 4fb9adca6..f8d351c7b 100644
--- a/runsc/boot/filter/config.go
+++ b/runsc/boot/filter/config.go
@@ -174,6 +174,18 @@ var allowedSyscalls = seccomp.SyscallRules{
syscall.SYS_LSEEK: {},
syscall.SYS_MADVISE: {},
syscall.SYS_MINCORE: {},
+ // Used by the Go runtime as a temporarily workaround for a Linux
+ // 5.2-5.4 bug.
+ //
+ // See src/runtime/os_linux_x86.go.
+ //
+ // TODO(b/148688965): Remove once this is gone from Go.
+ syscall.SYS_MLOCK: []seccomp.Rule{
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(4096),
+ },
+ },
syscall.SYS_MMAP: []seccomp.Rule{
{
seccomp.AllowAny{},
diff --git a/runsc/container/BUILD b/runsc/container/BUILD
index e21431e4c..0aaeea3a8 100644
--- a/runsc/container/BUILD
+++ b/runsc/container/BUILD
@@ -30,7 +30,7 @@ go_library(
go_test(
name = "container_test",
- size = "medium",
+ size = "large",
srcs = [
"console_test.go",
"container_test.go",
diff --git a/runsc/fsgofer/filter/config.go b/runsc/fsgofer/filter/config.go
index a1792330f..1dce36965 100644
--- a/runsc/fsgofer/filter/config.go
+++ b/runsc/fsgofer/filter/config.go
@@ -128,6 +128,18 @@ var allowedSyscalls = seccomp.SyscallRules{
syscall.SYS_MADVISE: {},
unix.SYS_MEMFD_CREATE: {}, /// Used by flipcall.PacketWindowAllocator.Init().
syscall.SYS_MKDIRAT: {},
+ // Used by the Go runtime as a temporarily workaround for a Linux
+ // 5.2-5.4 bug.
+ //
+ // See src/runtime/os_linux_x86.go.
+ //
+ // TODO(b/148688965): Remove once this is gone from Go.
+ syscall.SYS_MLOCK: []seccomp.Rule{
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(4096),
+ },
+ },
syscall.SYS_MMAP: []seccomp.Rule{
{
seccomp.AllowAny{},
diff --git a/runsc/testutil/testutil.go b/runsc/testutil/testutil.go
index fb22eae39..edf2e809a 100644
--- a/runsc/testutil/testutil.go
+++ b/runsc/testutil/testutil.go
@@ -136,8 +136,13 @@ func FindFile(path string) (string, error) {
// TestConfig returns the default configuration to use in tests. Note that
// 'RootDir' must be set by caller if required.
func TestConfig() *boot.Config {
+ logDir := ""
+ if dir, ok := os.LookupEnv("TEST_UNDECLARED_OUTPUTS_DIR"); ok {
+ logDir = dir + "/"
+ }
return &boot.Config{
Debug: true,
+ DebugLog: logDir,
LogFormat: "text",
DebugLogFormat: "text",
AlsoLogToStderr: true,
@@ -434,43 +439,40 @@ func IsStatic(filename string) (bool, error) {
return true, nil
}
-// TestBoundsForShard calculates the beginning and end indices for the test
-// based on the TEST_SHARD_INDEX and TEST_TOTAL_SHARDS environment vars. The
-// returned ints are the beginning (inclusive) and end (exclusive) of the
-// subslice corresponding to the shard. If either of the env vars are not
-// present, then the function will return bounds that include all tests. If
-// there are more shards than there are tests, then the returned list may be
-// empty.
-func TestBoundsForShard(numTests int) (int, int, error) {
+// TestIndicesForShard returns indices for this test shard based on the
+// TEST_SHARD_INDEX and TEST_TOTAL_SHARDS environment vars.
+//
+// If either of the env vars are not present, then the function will return all
+// tests. If there are more shards than there are tests, then the returned list
+// may be empty.
+func TestIndicesForShard(numTests int) ([]int, error) {
var (
- begin = 0
- end = numTests
+ shardIndex = 0
+ shardTotal = 1
)
- indexStr, totalStr := os.Getenv("TEST_SHARD_INDEX"), os.Getenv("TEST_TOTAL_SHARDS")
- if indexStr == "" || totalStr == "" {
- return begin, end, nil
- }
- // Parse index and total to ints.
- shardIndex, err := strconv.Atoi(indexStr)
- if err != nil {
- return 0, 0, fmt.Errorf("invalid TEST_SHARD_INDEX %q: %v", indexStr, err)
- }
- shardTotal, err := strconv.Atoi(totalStr)
- if err != nil {
- return 0, 0, fmt.Errorf("invalid TEST_TOTAL_SHARDS %q: %v", totalStr, err)
+ indexStr, totalStr := os.Getenv("TEST_SHARD_INDEX"), os.Getenv("TEST_TOTAL_SHARDS")
+ if indexStr != "" && totalStr != "" {
+ // Parse index and total to ints.
+ var err error
+ shardIndex, err = strconv.Atoi(indexStr)
+ if err != nil {
+ return nil, fmt.Errorf("invalid TEST_SHARD_INDEX %q: %v", indexStr, err)
+ }
+ shardTotal, err = strconv.Atoi(totalStr)
+ if err != nil {
+ return nil, fmt.Errorf("invalid TEST_TOTAL_SHARDS %q: %v", totalStr, err)
+ }
}
// Calculate!
- shardSize := int(math.Ceil(float64(numTests) / float64(shardTotal)))
- begin = shardIndex * shardSize
- end = ((shardIndex + 1) * shardSize)
- if begin > numTests {
- // Nothing to run.
- return 0, 0, nil
- }
- if end > numTests {
- end = numTests
+ var indices []int
+ numBlocks := int(math.Ceil(float64(numTests) / float64(shardTotal)))
+ for i := 0; i < numBlocks; i++ {
+ pick := i*shardTotal + shardIndex
+ if pick < numTests {
+ indices = append(indices, pick)
+ }
}
- return begin, end, nil
+ return indices, nil
}
diff --git a/scripts/common_build.sh b/scripts/common_build.sh
index a473a88a4..2c2a826c7 100755
--- a/scripts/common_build.sh
+++ b/scripts/common_build.sh
@@ -32,21 +32,21 @@ declare -r BAZEL_FLAGS=(
"--keep_going"
"--verbose_failures=true"
)
+BAZEL_RBE_AUTH_FLAGS=""
+BAZEL_RBE_FLAGS=""
if [[ -v KOKORO_BAZEL_AUTH_CREDENTIAL ]]; then
- declare -r BAZEL_RBE_AUTH_FLAGS=(
- "--auth_credentials=${KOKORO_BAZEL_AUTH_CREDENTIAL}"
- )
- declare -r BAZEL_RBE_FLAGS=("--config=remote")
+ declare -r BAZEL_RBE_AUTH_FLAGS="--auth_credentials=${KOKORO_BAZEL_AUTH_CREDENTIAL}"
+ declare -r BAZEL_RBE_FLAGS="--config=remote"
fi
# Wrap bazel.
function build() {
- bazel build "${BAZEL_RBE_FLAGS[@]}" "${BAZEL_RBE_AUTH_FLAGS[@]}" "${BAZEL_FLAGS[@]}" "$@" 2>&1 |
+ bazel build "${BAZEL_RBE_FLAGS}" "${BAZEL_RBE_AUTH_FLAGS}" "${BAZEL_FLAGS[@]}" "$@" 2>&1 |
tee /dev/fd/2 | grep -E '^ bazel-bin/' | awk '{ print $1; }'
}
function test() {
- bazel test "${BAZEL_RBE_FLAGS[@]}" "${BAZEL_RBE_AUTH_FLAGS[@]}" "${BAZEL_FLAGS[@]}" "$@"
+ bazel test "${BAZEL_RBE_FLAGS}" "${BAZEL_RBE_AUTH_FLAGS}" "${BAZEL_FLAGS[@]}" "$@"
}
function run() {
diff --git a/scripts/packetdrill_tests.sh b/scripts/packetdrill_tests.sh
new file mode 100755
index 000000000..fc6bef79c
--- /dev/null
+++ b/scripts/packetdrill_tests.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+# Copyright 2019 The gVisor Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+source $(dirname $0)/common.sh
+
+install_runsc_for_test runsc-d
+test_runsc $(bazel query "attr(tags, manual, tests(//test/packetdrill/...))")
diff --git a/test/iptables/filter_input.go b/test/iptables/filter_input.go
index e9f0978eb..bd6059921 100644
--- a/test/iptables/filter_input.go
+++ b/test/iptables/filter_input.go
@@ -256,10 +256,12 @@ func (FilterInputDropAll) LocalAction(ip net.IP) error {
// misunderstand and save the wrong tables.
type FilterInputMultiUDPRules struct{}
+// Name implements TestCase.Name.
func (FilterInputMultiUDPRules) Name() string {
return "FilterInputMultiUDPRules"
}
+// ContainerAction implements TestCase.ContainerAction.
func (FilterInputMultiUDPRules) ContainerAction(ip net.IP) error {
if err := filterTable("-A", "INPUT", "-p", "udp", "-m", "udp", "--destination-port", fmt.Sprintf("%d", dropPort), "-j", "DROP"); err != nil {
return err
@@ -270,6 +272,7 @@ func (FilterInputMultiUDPRules) ContainerAction(ip net.IP) error {
return filterTable("-L")
}
+// LocalAction implements TestCase.LocalAction.
func (FilterInputMultiUDPRules) LocalAction(ip net.IP) error {
// No-op.
return nil
@@ -279,10 +282,12 @@ func (FilterInputMultiUDPRules) LocalAction(ip net.IP) error {
// specified.
type FilterInputRequireProtocolUDP struct{}
+// Name implements TestCase.Name.
func (FilterInputRequireProtocolUDP) Name() string {
return "FilterInputRequireProtocolUDP"
}
+// ContainerAction implements TestCase.ContainerAction.
func (FilterInputRequireProtocolUDP) ContainerAction(ip net.IP) error {
if err := filterTable("-A", "INPUT", "-m", "udp", "--destination-port", fmt.Sprintf("%d", dropPort), "-j", "DROP"); err == nil {
return errors.New("expected iptables to fail with out \"-p udp\", but succeeded")
@@ -290,6 +295,7 @@ func (FilterInputRequireProtocolUDP) ContainerAction(ip net.IP) error {
return nil
}
+// LocalAction implements TestCase.LocalAction.
func (FilterInputRequireProtocolUDP) LocalAction(ip net.IP) error {
// No-op.
return nil
diff --git a/test/packetdrill/BUILD b/test/packetdrill/BUILD
new file mode 100644
index 000000000..d113555b1
--- /dev/null
+++ b/test/packetdrill/BUILD
@@ -0,0 +1,8 @@
+load("defs.bzl", "packetdrill_test")
+
+package(licenses = ["notice"])
+
+packetdrill_test(
+ name = "fin_wait2_timeout",
+ scripts = ["fin_wait2_timeout.pkt"],
+)
diff --git a/test/packetdrill/Dockerfile b/test/packetdrill/Dockerfile
new file mode 100644
index 000000000..bd4451355
--- /dev/null
+++ b/test/packetdrill/Dockerfile
@@ -0,0 +1,9 @@
+FROM ubuntu:bionic
+
+RUN apt-get update
+RUN apt-get install -y net-tools git iptables iputils-ping netcat tcpdump jq tar
+RUN hash -r
+RUN git clone --branch packetdrill-v2.0 \
+ https://github.com/google/packetdrill.git
+RUN cd packetdrill/gtests/net/packetdrill && ./configure && \
+ apt-get install -y bison flex make && make
diff --git a/test/packetdrill/defs.bzl b/test/packetdrill/defs.bzl
new file mode 100644
index 000000000..8623ce7b1
--- /dev/null
+++ b/test/packetdrill/defs.bzl
@@ -0,0 +1,87 @@
+"""Defines a rule for packetdrill test targets."""
+
+def _packetdrill_test_impl(ctx):
+ test_runner = ctx.executable._test_runner
+ runner = ctx.actions.declare_file("%s-runner" % ctx.label.name)
+
+ script_paths = []
+ for script in ctx.files.scripts:
+ script_paths.append(script.short_path)
+ runner_content = "\n".join([
+ "#!/bin/bash",
+ # This test will run part in a distinct user namespace. This can cause
+ # permission problems, because all runfiles may not be owned by the
+ # current user, and no other users will be mapped in that namespace.
+ # Make sure that everything is readable here.
+ "find . -type f -exec chmod a+rx {} \\;",
+ "find . -type d -exec chmod a+rx {} \\;",
+ "%s %s --init_script %s $@ -- %s\n" % (
+ test_runner.short_path,
+ " ".join(ctx.attr.flags),
+ ctx.files._init_script[0].short_path,
+ " ".join(script_paths),
+ ),
+ ])
+ ctx.actions.write(runner, runner_content, is_executable = True)
+
+ transitive_files = depset()
+ if hasattr(ctx.attr._test_runner, "data_runfiles"):
+ transitive_files = depset(ctx.attr._test_runner.data_runfiles.files)
+ runfiles = ctx.runfiles(
+ files = [test_runner] + ctx.files._init_script + ctx.files.scripts,
+ transitive_files = transitive_files,
+ collect_default = True,
+ collect_data = True,
+ )
+ return [DefaultInfo(executable = runner, runfiles = runfiles)]
+
+_packetdrill_test = rule(
+ attrs = {
+ "_test_runner": attr.label(
+ executable = True,
+ cfg = "host",
+ allow_files = True,
+ default = "packetdrill_test.sh",
+ ),
+ "_init_script": attr.label(
+ allow_single_file = True,
+ default = "packetdrill_setup.sh",
+ ),
+ "flags": attr.string_list(
+ mandatory = False,
+ default = [],
+ ),
+ "scripts": attr.label_list(
+ mandatory = True,
+ allow_files = True,
+ ),
+ },
+ test = True,
+ implementation = _packetdrill_test_impl,
+)
+
+_PACKETDRILL_TAGS = ["local", "manual"]
+
+def packetdrill_linux_test(name, **kwargs):
+ if "tags" not in kwargs:
+ kwargs["tags"] = _PACKETDRILL_TAGS
+ _packetdrill_test(
+ name = name + "_linux_test",
+ flags = ["--dut_platform", "linux"],
+ **kwargs
+ )
+
+def packetdrill_netstack_test(name, **kwargs):
+ if "tags" not in kwargs:
+ kwargs["tags"] = _PACKETDRILL_TAGS
+ _packetdrill_test(
+ name = name + "_netstack_test",
+ # This is the default runtime unless
+ # "--test_arg=--runtime=OTHER_RUNTIME" is used to override the value.
+ flags = ["--dut_platform", "netstack", "--runtime", "runsc-d"],
+ **kwargs
+ )
+
+def packetdrill_test(**kwargs):
+ packetdrill_linux_test(**kwargs)
+ packetdrill_netstack_test(**kwargs)
diff --git a/test/packetdrill/fin_wait2_timeout.pkt b/test/packetdrill/fin_wait2_timeout.pkt
new file mode 100644
index 000000000..613f0bec9
--- /dev/null
+++ b/test/packetdrill/fin_wait2_timeout.pkt
@@ -0,0 +1,23 @@
+// Test that a socket in FIN_WAIT_2 eventually times out and a subsequent
+// packet generates a RST.
+
+0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 bind(3, ..., ...) = 0
+
++0 listen(3, 1) = 0
+
+// Establish a connection without timestamps.
++0 < S 0:0(0) win 32792 <mss 1460,sackOK,nop,nop,nop,wscale 7>
++0 > S. 0:0(0) ack 1 <...>
++0 < P. 1:1(0) ack 1 win 257
+
++0.100 accept(3, ..., ...) = 4
+// set FIN_WAIT2 timeout to 1 seconds.
++0.100 setsockopt(4, SOL_TCP, TCP_LINGER2, [1], 4) = 0
++0 close(4) = 0
+
++0 > F. 1:1(0) ack 1 <...>
++0 < . 1:1(0) ack 2 win 257
+
++1.1 < . 1:1(0) ack 2 win 257
++0 > R 2:2(0) win 0
diff --git a/test/packetdrill/packetdrill_setup.sh b/test/packetdrill/packetdrill_setup.sh
new file mode 100755
index 000000000..b858072f0
--- /dev/null
+++ b/test/packetdrill/packetdrill_setup.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+# Copyright 2018 The gVisor Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This script runs both within the sentry context and natively. It should tweak
+# TCP parameters to match expectations found in the script files.
+sysctl -q net.ipv4.tcp_sack=1
+sysctl -q net.ipv4.tcp_rmem="4096 2097152 $((8*1024*1024))"
+sysctl -q net.ipv4.tcp_wmem="4096 2097152 $((8*1024*1024))"
+
+# There may be errors from the above, but they will show up in the test logs and
+# we always want to proceed from this point. It's possible that values were
+# already set correctly and the nodes were not available in the namespace.
+exit 0
diff --git a/test/packetdrill/packetdrill_test.sh b/test/packetdrill/packetdrill_test.sh
new file mode 100755
index 000000000..0b22dfd5c
--- /dev/null
+++ b/test/packetdrill/packetdrill_test.sh
@@ -0,0 +1,225 @@
+#!/bin/bash
+
+# Copyright 2020 The gVisor Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Run a packetdrill test. Two docker containers are made, one for the
+# Device-Under-Test (DUT) and one for the test runner. Each is attached with
+# two networks, one for control packets that aid the test and one for test
+# packets which are sent as part of the test and observed for correctness.
+
+set -euxo pipefail
+
+function failure() {
+ local lineno=$1
+ local msg=$2
+ local filename="$0"
+ echo "FAIL: $filename:$lineno: $msg"
+}
+trap 'failure ${LINENO} "$BASH_COMMAND"' ERR
+
+declare -r LONGOPTS="dut_platform:,init_script:,runtime:"
+
+# Don't use declare below so that the error from getopt will end the script.
+PARSED=$(getopt --options "" --longoptions=$LONGOPTS --name "$0" -- "$@")
+
+eval set -- "$PARSED"
+
+while true; do
+ case "$1" in
+ --dut_platform)
+ # Either "linux" or "netstack".
+ declare -r DUT_PLATFORM="$2"
+ shift 2
+ ;;
+ --init_script)
+ declare -r INIT_SCRIPT="$2"
+ shift 2
+ ;;
+ --runtime)
+ # Not readonly because there might be multiple --runtime arguments and we
+ # want to use just the last one. Only used if --dut_platform is
+ # "netstack".
+ declare RUNTIME="$2"
+ shift 2
+ ;;
+ --)
+ shift
+ break
+ ;;
+ *)
+ echo "Programming error"
+ exit 3
+ esac
+done
+
+# All the other arguments are scripts.
+declare -r scripts="$@"
+
+# Check that the required flags are defined in a way that is safe for "set -u".
+if [[ "${DUT_PLATFORM-}" == "netstack" ]]; then
+ if [[ -z "${RUNTIME-}" ]]; then
+ echo "FAIL: Missing --runtime argument: ${RUNTIME-}"
+ exit 2
+ fi
+ declare -r RUNTIME_ARG="--runtime ${RUNTIME}"
+elif [[ "${DUT_PLATFORM-}" == "linux" ]]; then
+ declare -r RUNTIME_ARG=""
+else
+ echo "FAIL: Bad or missing --dut_platform argument: ${DUT_PLATFORM-}"
+ exit 2
+fi
+if [[ ! -x "${INIT_SCRIPT-}" ]]; then
+ echo "FAIL: Bad or missing --init_script: ${INIT_SCRIPT-}"
+ exit 2
+fi
+
+# Variables specific to the control network and interface start with CTRL_.
+# Variables specific to the test network and interface start with TEST_.
+# Variables specific to the DUT start with DUT_.
+# Variables specific to the test runner start with TEST_RUNNER_.
+declare -r PACKETDRILL="/packetdrill/gtests/net/packetdrill/packetdrill"
+# Use random numbers so that test networks don't collide.
+declare -r CTRL_NET="ctrl_net-${RANDOM}${RANDOM}"
+declare -r TEST_NET="test_net-${RANDOM}${RANDOM}"
+declare -r tolerance_usecs=100000
+# On both DUT and test runner, testing packets are on the eth2 interface.
+declare -r TEST_DEVICE="eth2"
+# Number of bits in the *_NET_PREFIX variables.
+declare -r NET_MASK="24"
+function new_net_prefix() {
+ # Class C, 192.0.0.0 to 223.255.255.255, transitionally has mask 24.
+ echo "$(shuf -i 192-223 -n 1).$(shuf -i 0-255 -n 1).$(shuf -i 0-255 -n 1)"
+}
+# Last bits of the DUT's IP address.
+declare -r DUT_NET_SUFFIX=".10"
+# Control port.
+declare -r CTRL_PORT="40000"
+# Last bits of the test runner's IP address.
+declare -r TEST_RUNNER_NET_SUFFIX=".20"
+declare -r TIMEOUT="60"
+declare -r IMAGE_TAG="gcr.io/gvisor-presubmit/packetdrill"
+
+# Make sure that docker is installed.
+docker --version
+
+function finish {
+ local cleanup_success=1
+ for net in "${CTRL_NET}" "${TEST_NET}"; do
+ # Kill all processes attached to ${net}.
+ for docker_command in "kill" "rm"; do
+ (docker network inspect "${net}" \
+ --format '{{range $key, $value := .Containers}}{{$key}} {{end}}' \
+ | xargs -r docker "${docker_command}") || \
+ cleanup_success=0
+ done
+ # Remove the network.
+ docker network rm "${net}" || \
+ cleanup_success=0
+ done
+
+ if ((!$cleanup_success)); then
+ echo "FAIL: Cleanup command failed"
+ exit 4
+ fi
+}
+trap finish EXIT
+
+# Subnet for control packets between test runner and DUT.
+declare CTRL_NET_PREFIX=$(new_net_prefix)
+while ! docker network create \
+ "--subnet=${CTRL_NET_PREFIX}.0/${NET_MASK}" "${CTRL_NET}"; do
+ sleep 0.1
+ declare CTRL_NET_PREFIX=$(new_net_prefix)
+done
+
+# Subnet for the packets that are part of the test.
+declare TEST_NET_PREFIX=$(new_net_prefix)
+while ! docker network create \
+ "--subnet=${TEST_NET_PREFIX}.0/${NET_MASK}" "${TEST_NET}"; do
+ sleep 0.1
+ declare TEST_NET_PREFIX=$(new_net_prefix)
+done
+
+docker pull "${IMAGE_TAG}"
+
+# Create the DUT container and connect to network.
+DUT=$(docker create ${RUNTIME_ARG} --privileged --rm \
+ --stop-timeout ${TIMEOUT} -it ${IMAGE_TAG})
+docker network connect "${CTRL_NET}" \
+ --ip "${CTRL_NET_PREFIX}${DUT_NET_SUFFIX}" "${DUT}" \
+ || (docker kill ${DUT}; docker rm ${DUT}; false)
+docker network connect "${TEST_NET}" \
+ --ip "${TEST_NET_PREFIX}${DUT_NET_SUFFIX}" "${DUT}" \
+ || (docker kill ${DUT}; docker rm ${DUT}; false)
+docker start "${DUT}"
+
+# Create the test runner container and connect to network.
+TEST_RUNNER=$(docker create --privileged --rm \
+ --stop-timeout ${TIMEOUT} -it ${IMAGE_TAG})
+docker network connect "${CTRL_NET}" \
+ --ip "${CTRL_NET_PREFIX}${TEST_RUNNER_NET_SUFFIX}" "${TEST_RUNNER}" \
+ || (docker kill ${TEST_RUNNER}; docker rm ${REST_RUNNER}; false)
+docker network connect "${TEST_NET}" \
+ --ip "${TEST_NET_PREFIX}${TEST_RUNNER_NET_SUFFIX}" "${TEST_RUNNER}" \
+ || (docker kill ${TEST_RUNNER}; docker rm ${REST_RUNNER}; false)
+docker start "${TEST_RUNNER}"
+
+# Run tcpdump in the test runner unbuffered, without dns resolution, just on the
+# interface with the test packets.
+docker exec -t ${TEST_RUNNER} tcpdump -U -n -i "${TEST_DEVICE}" &
+
+# Start a packetdrill server on the test_runner. The packetdrill server sends
+# packets and asserts that they are received.
+docker exec -d "${TEST_RUNNER}" \
+ ${PACKETDRILL} --wire_server --wire_server_dev="${TEST_DEVICE}" \
+ --wire_server_ip="${CTRL_NET_PREFIX}${TEST_RUNNER_NET_SUFFIX}" \
+ --wire_server_port="${CTRL_PORT}" \
+ --local_ip="${TEST_NET_PREFIX}${TEST_RUNNER_NET_SUFFIX}" \
+ --remote_ip="${TEST_NET_PREFIX}${DUT_NET_SUFFIX}"
+
+# Because the Linux kernel receives the SYN-ACK but didn't send the SYN it will
+# issue a RST. To prevent this IPtables can be used to filter those out.
+docker exec "${TEST_RUNNER}" \
+ iptables -A OUTPUT -p tcp --tcp-flags RST RST -j DROP
+
+# Wait for the packetdrill server on the test runner to come. Attempt to
+# connect to it from the DUT every 100 milliseconds until success.
+while ! docker exec "${DUT}" \
+ nc -zv "${CTRL_NET_PREFIX}${TEST_RUNNER_NET_SUFFIX}" "${CTRL_PORT}"; do
+ sleep 0.1
+done
+
+# Copy the packetdrill setup script to the DUT.
+docker cp -L "${INIT_SCRIPT}" "${DUT}:packetdrill_setup.sh"
+
+# Copy the packetdrill scripts to the DUT.
+declare -a dut_scripts
+for script in $scripts; do
+ docker cp -L "${script}" "${DUT}:$(basename ${script})"
+ dut_scripts+=("/$(basename ${script})")
+done
+
+# Start a packetdrill client on the DUT. The packetdrill client runs POSIX
+# socket commands and also sends instructions to the server.
+docker exec -t "${DUT}" \
+ ${PACKETDRILL} --wire_client --wire_client_dev="${TEST_DEVICE}" \
+ --wire_server_ip="${CTRL_NET_PREFIX}${TEST_RUNNER_NET_SUFFIX}" \
+ --wire_server_port="${CTRL_PORT}" \
+ --local_ip="${TEST_NET_PREFIX}${DUT_NET_SUFFIX}" \
+ --remote_ip="${TEST_NET_PREFIX}${TEST_RUNNER_NET_SUFFIX}" \
+ --init_scripts=/packetdrill_setup.sh \
+ --tolerance_usecs="${tolerance_usecs}" "${dut_scripts[@]}"
+
+echo PASS: No errors.
diff --git a/test/runtimes/runner.go b/test/runtimes/runner.go
index bec37c69d..ddb890dbc 100644
--- a/test/runtimes/runner.go
+++ b/test/runtimes/runner.go
@@ -20,7 +20,6 @@ import (
"flag"
"fmt"
"io"
- "log"
"os"
"sort"
"strings"
@@ -101,17 +100,15 @@ func getTests(d dockerutil.Docker, blacklist map[string]struct{}) ([]testing.Int
// shard.
tests := strings.Fields(list)
sort.Strings(tests)
- begin, end, err := testutil.TestBoundsForShard(len(tests))
+ indices, err := testutil.TestIndicesForShard(len(tests))
if err != nil {
return nil, fmt.Errorf("TestsForShard() failed: %v", err)
}
- log.Printf("Got bounds [%d:%d) for shard out of %d total tests", begin, end, len(tests))
- tests = tests[begin:end]
var itests []testing.InternalTest
- for _, tc := range tests {
+ for _, tci := range indices {
// Capture tc in this scope.
- tc := tc
+ tc := tests[tci]
itests = append(itests, testing.InternalTest{
Name: tc,
F: func(t *testing.T) {
diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD
index 40e974314..31d239c0e 100644
--- a/test/syscalls/BUILD
+++ b/test/syscalls/BUILD
@@ -46,6 +46,15 @@ syscall_test(test = "//test/syscalls/linux:brk_test")
syscall_test(test = "//test/syscalls/linux:socket_test")
syscall_test(
+ size = "large",
+ shard_count = 50,
+ # Takes too long for TSAN. Since this is kind of a stress test that doesn't
+ # involve much concurrency, TSAN's usefulness here is limited anyway.
+ tags = ["nogotsan"],
+ test = "//test/syscalls/linux:socket_stress_test",
+)
+
+syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:chdir_test",
)
@@ -225,7 +234,6 @@ syscall_test(
syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:mknod_test",
- use_tmpfs = True, # mknod is not supported over gofer.
)
syscall_test(
diff --git a/test/syscalls/linux/32bit.cc b/test/syscalls/linux/32bit.cc
index 2751fb4e7..9883aef61 100644
--- a/test/syscalls/linux/32bit.cc
+++ b/test/syscalls/linux/32bit.cc
@@ -102,7 +102,8 @@ TEST(Syscall32Bit, Int80) {
}
TEST(Syscall32Bit, Sysenter) {
- if (PlatformSupport32Bit() == PlatformSupport::Allowed &&
+ if ((PlatformSupport32Bit() == PlatformSupport::Allowed ||
+ PlatformSupport32Bit() == PlatformSupport::Ignored) &&
GetCPUVendor() == CPUVendor::kAMD) {
// SYSENTER is an illegal instruction in compatibility mode on AMD.
EXPECT_EXIT(ExitGroup32(kSysenter, kExitCode),
@@ -133,7 +134,8 @@ TEST(Syscall32Bit, Sysenter) {
}
TEST(Syscall32Bit, Syscall) {
- if (PlatformSupport32Bit() == PlatformSupport::Allowed &&
+ if ((PlatformSupport32Bit() == PlatformSupport::Allowed ||
+ PlatformSupport32Bit() == PlatformSupport::Ignored) &&
GetCPUVendor() == CPUVendor::kIntel) {
// SYSCALL is an illegal instruction in compatibility mode on Intel.
EXPECT_EXIT(ExitGroup32(kSyscall, kExitCode),
diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD
index 6f57c9755..f2e3c7072 100644
--- a/test/syscalls/linux/BUILD
+++ b/test/syscalls/linux/BUILD
@@ -1,4 +1,4 @@
-load("//tools:defs.bzl", "cc_binary", "cc_library", "default_net_util", "select_system")
+load("//tools:defs.bzl", "cc_binary", "cc_library", "default_net_util", "gtest", "select_arch", "select_system")
package(
default_visibility = ["//:sandbox"],
@@ -11,6 +11,7 @@ exports_files(
"socket_inet_loopback.cc",
"socket_ip_loopback_blocking.cc",
"socket_ip_tcp_loopback.cc",
+ "socket_ip_udp_loopback.cc",
"socket_ipv4_udp_unbound_loopback.cc",
"tcp_socket.cc",
"udp_socket.cc",
@@ -82,14 +83,14 @@ cc_library(
srcs = ["base_poll_test.cc"],
hdrs = ["base_poll_test.h"],
deps = [
+ "@com_google_absl//absl/memory",
+ "@com_google_absl//absl/synchronization",
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:logging",
"//test/util:signal_util",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_absl//absl/memory",
- "@com_google_absl//absl/synchronization",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
@@ -99,11 +100,11 @@ cc_library(
hdrs = ["file_base.h"],
deps = [
"//test/util:file_descriptor",
+ "@com_google_absl//absl/strings",
+ gtest,
"//test/util:posix_error",
"//test/util:temp_path",
"//test/util:test_util",
- "@com_google_absl//absl/strings",
- "@com_google_googletest//:gtest",
],
)
@@ -130,11 +131,12 @@ cc_library(
hdrs = ["socket_test_util.h"],
defines = select_system(),
deps = default_net_util() + [
- "@com_google_googletest//:gtest",
+ gtest,
"@com_google_absl//absl/memory",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/strings:str_format",
"@com_google_absl//absl/time",
+ "@com_google_absl//absl/types:optional",
"//test/util:file_descriptor",
"//test/util:posix_error",
"//test/util:temp_path",
@@ -155,9 +157,9 @@ cc_library(
hdrs = ["unix_domain_socket_test_util.h"],
deps = [
":socket_test_util",
- "//test/util:test_util",
"@com_google_absl//absl/strings",
- "@com_google_googletest//:gtest",
+ gtest,
+ "//test/util:test_util",
],
)
@@ -179,30 +181,33 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:cleanup",
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:posix_error",
"//test/util:signal_util",
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
"//test/util:timer_util",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
cc_binary(
name = "32bit_test",
testonly = 1,
- srcs = ["32bit.cc"],
+ srcs = select_arch(
+ amd64 = ["32bit.cc"],
+ arm64 = [],
+ ),
linkstatic = 1,
deps = [
+ "@com_google_absl//absl/base:core_headers",
+ gtest,
"//test/util:memory_util",
"//test/util:platform_util",
"//test/util:posix_error",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_absl//absl/base:core_headers",
- "@com_google_googletest//:gtest",
],
)
@@ -215,9 +220,9 @@ cc_binary(
":socket_test_util",
":unix_domain_socket_test_util",
"//test/util:file_descriptor",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -230,9 +235,9 @@ cc_binary(
":socket_test_util",
":unix_domain_socket_test_util",
"//test/util:file_descriptor",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -244,10 +249,10 @@ cc_binary(
deps = [
"//test/util:capability_util",
"//test/util:fs_util",
+ gtest,
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -259,12 +264,12 @@ cc_binary(
deps = [
"//test/util:cleanup",
"//test/util:fs_util",
+ "@com_google_absl//absl/strings",
+ gtest,
"//test/util:posix_error",
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_absl//absl/strings",
- "@com_google_googletest//:gtest",
],
)
@@ -277,12 +282,11 @@ cc_binary(
],
linkstatic = 1,
deps = [
- # The heapchecker doesn't recognize that io_destroy munmaps.
- "@com_google_googletest//:gtest",
- "@com_google_absl//absl/strings",
"//test/util:cleanup",
"//test/util:file_descriptor",
"//test/util:fs_util",
+ "@com_google_absl//absl/strings",
+ gtest,
"//test/util:memory_util",
"//test/util:posix_error",
"//test/util:proc_util",
@@ -299,12 +303,12 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:file_descriptor",
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:logging",
"//test/util:signal_util",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
@@ -317,9 +321,9 @@ cc_binary(
"//:sandbox",
],
deps = [
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -331,9 +335,9 @@ cc_binary(
deps = [
":socket_test_util",
":unix_domain_socket_test_util",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -345,9 +349,9 @@ cc_binary(
deps = [
":socket_test_util",
"//test/util:file_descriptor",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -369,10 +373,10 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:capability_util",
+ gtest,
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -385,10 +389,10 @@ cc_binary(
"//test/util:capability_util",
"//test/util:file_descriptor",
"//test/util:fs_util",
+ gtest,
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -401,14 +405,14 @@ cc_binary(
"//test/util:capability_util",
"//test/util:file_descriptor",
"//test/util:fs_util",
+ "@com_google_absl//absl/flags:flag",
+ "@com_google_absl//absl/synchronization",
+ gtest,
"//test/util:posix_error",
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_absl//absl/flags:flag",
- "@com_google_absl//absl/synchronization",
- "@com_google_googletest//:gtest",
],
)
@@ -421,12 +425,12 @@ cc_binary(
"//test/util:capability_util",
"//test/util:file_descriptor",
"//test/util:fs_util",
+ "@com_google_absl//absl/flags:flag",
+ gtest,
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_absl//absl/flags:flag",
- "@com_google_googletest//:gtest",
],
)
@@ -440,12 +444,12 @@ cc_binary(
"//test/util:cleanup",
"//test/util:file_descriptor",
"//test/util:fs_util",
+ "@com_google_absl//absl/strings",
+ gtest,
"//test/util:mount_util",
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_absl//absl/strings",
- "@com_google_googletest//:gtest",
],
)
@@ -455,9 +459,9 @@ cc_binary(
srcs = ["clock_getres.cc"],
linkstatic = 1,
deps = [
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -467,11 +471,11 @@ cc_binary(
srcs = ["clock_gettime.cc"],
linkstatic = 1,
deps = [
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
@@ -481,13 +485,13 @@ cc_binary(
srcs = ["concurrency.cc"],
linkstatic = 1,
deps = [
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:platform_util",
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_absl//absl/strings",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
@@ -500,9 +504,9 @@ cc_binary(
":socket_test_util",
"//test/util:file_descriptor",
"//test/util:fs_util",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -513,10 +517,10 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:fs_util",
+ gtest,
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -527,9 +531,9 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:file_descriptor",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -541,11 +545,11 @@ cc_binary(
deps = [
"//test/util:eventfd_util",
"//test/util:file_descriptor",
+ gtest,
"//test/util:posix_error",
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -558,10 +562,10 @@ cc_binary(
"//test/util:epoll_util",
"//test/util:eventfd_util",
"//test/util:file_descriptor",
+ gtest,
"//test/util:posix_error",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -573,10 +577,10 @@ cc_binary(
deps = [
"//test/util:epoll_util",
"//test/util:eventfd_util",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_googletest//:gtest",
],
)
@@ -586,12 +590,12 @@ cc_binary(
srcs = ["exceptions.cc"],
linkstatic = 1,
deps = [
+ gtest,
"//test/util:logging",
"//test/util:platform_util",
"//test/util:signal_util",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -601,10 +605,10 @@ cc_binary(
srcs = ["getcpu.cc"],
linkstatic = 1,
deps = [
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
@@ -614,10 +618,10 @@ cc_binary(
srcs = ["getcpu.cc"],
linkstatic = 1,
deps = [
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
@@ -627,33 +631,36 @@ cc_binary(
srcs = ["getrusage.cc"],
linkstatic = 1,
deps = [
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:logging",
"//test/util:memory_util",
"//test/util:signal_util",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
cc_binary(
name = "exec_binary_test",
testonly = 1,
- srcs = ["exec_binary.cc"],
+ srcs = select_arch(
+ amd64 = ["exec_binary.cc"],
+ arm64 = [],
+ ),
linkstatic = 1,
deps = [
"//test/util:cleanup",
"//test/util:file_descriptor",
"//test/util:fs_util",
+ "@com_google_absl//absl/strings",
+ gtest,
"//test/util:multiprocess_util",
"//test/util:posix_error",
"//test/util:proc_util",
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_absl//absl/strings",
- "@com_google_googletest//:gtest",
],
)
@@ -676,15 +683,15 @@ cc_binary(
deps = [
"//test/util:file_descriptor",
"//test/util:fs_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/synchronization",
+ "@com_google_absl//absl/types:optional",
+ gtest,
"//test/util:multiprocess_util",
"//test/util:posix_error",
"//test/util:temp_path",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_absl//absl/strings",
- "@com_google_absl//absl/synchronization",
- "@com_google_absl//absl/types:optional",
- "@com_google_googletest//:gtest",
],
)
@@ -695,11 +702,11 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:file_descriptor",
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
"//test/util:time_util",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
@@ -712,10 +719,10 @@ cc_binary(
":file_base",
"//test/util:cleanup",
"//test/util:file_descriptor",
+ gtest,
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -725,9 +732,9 @@ cc_binary(
srcs = ["fault.cc"],
linkstatic = 1,
deps = [
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -738,10 +745,10 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:capability_util",
+ gtest,
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -755,18 +762,18 @@ cc_binary(
"//test/util:cleanup",
"//test/util:eventfd_util",
"//test/util:fs_util",
+ "@com_google_absl//absl/base:core_headers",
+ "@com_google_absl//absl/flags:flag",
+ "@com_google_absl//absl/memory",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:multiprocess_util",
"//test/util:posix_error",
"//test/util:save_util",
"//test/util:temp_path",
"//test/util:test_util",
"//test/util:timer_util",
- "@com_google_absl//absl/base:core_headers",
- "@com_google_absl//absl/flags:flag",
- "@com_google_absl//absl/memory",
- "@com_google_absl//absl/strings",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
@@ -780,15 +787,15 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:file_descriptor",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:posix_error",
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
"//test/util:timer_util",
- "@com_google_absl//absl/strings",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
@@ -799,40 +806,46 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:capability_util",
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:logging",
"//test/util:memory_util",
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
cc_binary(
name = "fpsig_fork_test",
testonly = 1,
- srcs = ["fpsig_fork.cc"],
+ srcs = select_arch(
+ amd64 = ["fpsig_fork.cc"],
+ arm64 = [],
+ ),
linkstatic = 1,
deps = [
+ gtest,
"//test/util:logging",
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_googletest//:gtest",
],
)
cc_binary(
name = "fpsig_nested_test",
testonly = 1,
- srcs = ["fpsig_nested.cc"],
+ srcs = select_arch(
+ amd64 = ["fpsig_nested.cc"],
+ arm64 = [],
+ ),
linkstatic = 1,
deps = [
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_googletest//:gtest",
],
)
@@ -843,10 +856,10 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:file_descriptor",
+ gtest,
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -857,10 +870,10 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:file_descriptor",
+ gtest,
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -872,6 +885,9 @@ cc_binary(
deps = [
"//test/util:cleanup",
"//test/util:file_descriptor",
+ "@com_google_absl//absl/memory",
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:memory_util",
"//test/util:save_util",
"//test/util:temp_path",
@@ -880,9 +896,6 @@ cc_binary(
"//test/util:thread_util",
"//test/util:time_util",
"//test/util:timer_util",
- "@com_google_absl//absl/memory",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
@@ -895,12 +908,12 @@ cc_binary(
"//test/util:eventfd_util",
"//test/util:file_descriptor",
"//test/util:fs_util",
+ "@com_google_absl//absl/strings",
+ gtest,
"//test/util:posix_error",
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_absl//absl/strings",
- "@com_google_googletest//:gtest",
],
)
@@ -910,9 +923,9 @@ cc_binary(
srcs = ["getrandom.cc"],
linkstatic = 1,
deps = [
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -945,10 +958,10 @@ cc_binary(
":socket_test_util",
":unix_domain_socket_test_util",
"//test/util:file_descriptor",
+ gtest,
"//test/util:signal_util",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -972,9 +985,9 @@ cc_binary(
":socket_test_util",
"//test/util:capability_util",
"//test/util:file_descriptor",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -985,6 +998,9 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:file_descriptor",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:logging",
"//test/util:multiprocess_util",
"//test/util:posix_error",
@@ -992,9 +1008,6 @@ cc_binary(
"//test/util:test_util",
"//test/util:thread_util",
"//test/util:timer_util",
- "@com_google_absl//absl/strings",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
@@ -1006,15 +1019,15 @@ cc_binary(
deps = [
"//test/util:capability_util",
"//test/util:file_descriptor",
+ "@com_google_absl//absl/flags:flag",
+ "@com_google_absl//absl/synchronization",
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:logging",
"//test/util:signal_util",
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_absl//absl/flags:flag",
- "@com_google_absl//absl/synchronization",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
@@ -1027,14 +1040,14 @@ cc_binary(
"//test/util:capability_util",
"//test/util:file_descriptor",
"//test/util:fs_util",
+ "@com_google_absl//absl/flags:flag",
+ "@com_google_absl//absl/strings",
+ gtest,
"//test/util:posix_error",
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_absl//absl/flags:flag",
- "@com_google_absl//absl/strings",
- "@com_google_googletest//:gtest",
],
)
@@ -1045,10 +1058,10 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:file_descriptor",
+ gtest,
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -1059,6 +1072,7 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:file_descriptor",
+ gtest,
"//test/util:logging",
"//test/util:memory_util",
"//test/util:multiprocess_util",
@@ -1066,7 +1080,6 @@ cc_binary(
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -1077,12 +1090,12 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:cleanup",
+ "@com_google_absl//absl/memory",
+ gtest,
"//test/util:memory_util",
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_absl//absl/memory",
- "@com_google_googletest//:gtest",
],
)
@@ -1092,11 +1105,11 @@ cc_binary(
srcs = ["mincore.cc"],
linkstatic = 1,
deps = [
+ gtest,
"//test/util:memory_util",
"//test/util:posix_error",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -1109,10 +1122,10 @@ cc_binary(
":temp_umask",
"//test/util:capability_util",
"//test/util:fs_util",
+ gtest,
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -1123,11 +1136,11 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:file_descriptor",
+ gtest,
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_googletest//:gtest",
],
)
@@ -1139,12 +1152,12 @@ cc_binary(
deps = [
"//test/util:capability_util",
"//test/util:cleanup",
+ gtest,
"//test/util:memory_util",
"//test/util:multiprocess_util",
"//test/util:rlimit_util",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -1157,13 +1170,13 @@ cc_binary(
"//test/util:cleanup",
"//test/util:file_descriptor",
"//test/util:fs_util",
+ "@com_google_absl//absl/strings",
+ gtest,
"//test/util:memory_util",
"//test/util:multiprocess_util",
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_absl//absl/strings",
- "@com_google_googletest//:gtest",
],
)
@@ -1176,6 +1189,9 @@ cc_binary(
"//test/util:capability_util",
"//test/util:file_descriptor",
"//test/util:fs_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:mount_util",
"//test/util:multiprocess_util",
"//test/util:posix_error",
@@ -1183,9 +1199,6 @@ cc_binary(
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_absl//absl/strings",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
@@ -1195,10 +1208,9 @@ cc_binary(
srcs = ["mremap.cc"],
linkstatic = 1,
deps = [
- # The heap check fails due to MremapDeathTest
- "@com_google_googletest//:gtest",
- "@com_google_absl//absl/strings",
"//test/util:file_descriptor",
+ "@com_google_absl//absl/strings",
+ gtest,
"//test/util:logging",
"//test/util:memory_util",
"//test/util:multiprocess_util",
@@ -1230,9 +1242,9 @@ cc_binary(
srcs = ["munmap.cc"],
linkstatic = 1,
deps = [
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -1249,14 +1261,14 @@ cc_binary(
"//test/util:cleanup",
"//test/util:file_descriptor",
"//test/util:fs_util",
+ "@com_google_absl//absl/memory",
+ "@com_google_absl//absl/strings",
+ gtest,
"//test/util:posix_error",
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_absl//absl/memory",
- "@com_google_absl//absl/strings",
- "@com_google_googletest//:gtest",
],
)
@@ -1270,10 +1282,10 @@ cc_binary(
"//test/util:capability_util",
"//test/util:file_descriptor",
"//test/util:fs_util",
+ gtest,
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -1287,11 +1299,11 @@ cc_binary(
":unix_domain_socket_test_util",
"//test/util:capability_util",
"//test/util:file_descriptor",
- "//test/util:test_main",
- "//test/util:test_util",
"@com_google_absl//absl/base:core_headers",
"@com_google_absl//absl/base:endian",
- "@com_google_googletest//:gtest",
+ gtest,
+ "//test/util:test_main",
+ "//test/util:test_util",
],
)
@@ -1305,11 +1317,11 @@ cc_binary(
":unix_domain_socket_test_util",
"//test/util:capability_util",
"//test/util:file_descriptor",
- "//test/util:test_main",
- "//test/util:test_util",
"@com_google_absl//absl/base:core_headers",
"@com_google_absl//absl/base:endian",
- "@com_google_googletest//:gtest",
+ gtest,
+ "//test/util:test_main",
+ "//test/util:test_util",
],
)
@@ -1321,16 +1333,16 @@ cc_binary(
deps = [
"//test/util:capability_util",
"//test/util:file_descriptor",
+ "@com_google_absl//absl/base:core_headers",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/synchronization",
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:posix_error",
"//test/util:pty_util",
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_absl//absl/base:core_headers",
- "@com_google_absl//absl/strings",
- "@com_google_absl//absl/synchronization",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
@@ -1342,12 +1354,12 @@ cc_binary(
deps = [
"//test/util:capability_util",
"//test/util:file_descriptor",
+ "@com_google_absl//absl/base:core_headers",
+ gtest,
"//test/util:posix_error",
"//test/util:pty_util",
"//test/util:test_main",
"//test/util:thread_util",
- "@com_google_absl//absl/base:core_headers",
- "@com_google_googletest//:gtest",
],
)
@@ -1360,12 +1372,12 @@ cc_binary(
"//test/syscalls/linux:socket_test_util",
"//test/util:file_descriptor",
"//test/util:fs_util",
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:posix_error",
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
@@ -1375,13 +1387,13 @@ cc_binary(
srcs = ["pause.cc"],
linkstatic = 1,
deps = [
+ "@com_google_absl//absl/synchronization",
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:signal_util",
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_absl//absl/synchronization",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
@@ -1393,15 +1405,15 @@ cc_binary(
deps = [
"//test/util:file_descriptor",
"//test/util:fs_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/synchronization",
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:posix_error",
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_absl//absl/strings",
- "@com_google_absl//absl/synchronization",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
@@ -1414,13 +1426,13 @@ cc_binary(
":base_poll_test",
"//test/util:eventfd_util",
"//test/util:file_descriptor",
+ "@com_google_absl//absl/synchronization",
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:logging",
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_absl//absl/synchronization",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
@@ -1431,11 +1443,11 @@ cc_binary(
linkstatic = 1,
deps = [
":base_poll_test",
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:signal_util",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
@@ -1446,9 +1458,9 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:file_descriptor",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -1460,12 +1472,12 @@ cc_binary(
deps = [
"//test/util:capability_util",
"//test/util:cleanup",
+ "@com_google_absl//absl/flags:flag",
+ gtest,
"//test/util:multiprocess_util",
"//test/util:posix_error",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_absl//absl/flags:flag",
- "@com_google_googletest//:gtest",
],
)
@@ -1476,13 +1488,13 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:capability_util",
+ "@com_google_absl//absl/flags:flag",
+ gtest,
"//test/util:logging",
"//test/util:multiprocess_util",
"//test/util:posix_error",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_absl//absl/flags:flag",
- "@com_google_googletest//:gtest",
],
)
@@ -1493,10 +1505,10 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:file_descriptor",
+ gtest,
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -1507,6 +1519,8 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:file_descriptor",
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:logging",
"//test/util:memory_util",
"//test/util:temp_path",
@@ -1514,8 +1528,6 @@ cc_binary(
"//test/util:test_util",
"//test/util:thread_util",
"//test/util:timer_util",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
@@ -1529,13 +1541,13 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:file_descriptor",
+ "@com_google_absl//absl/memory",
+ "@com_google_absl//absl/strings",
+ gtest,
"//test/util:posix_error",
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_absl//absl/memory",
- "@com_google_absl//absl/strings",
- "@com_google_googletest//:gtest",
],
)
@@ -1547,11 +1559,11 @@ cc_binary(
deps = [
"//test/util:capability_util",
"//test/util:fs_util",
+ "@com_google_absl//absl/strings",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_absl//absl/strings",
- "@com_google_googletest//:gtest",
],
)
@@ -1565,6 +1577,10 @@ cc_binary(
"//test/util:cleanup",
"//test/util:file_descriptor",
"//test/util:fs_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/synchronization",
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:memory_util",
"//test/util:posix_error",
"//test/util:temp_path",
@@ -1572,10 +1588,6 @@ cc_binary(
"//test/util:thread_util",
"//test/util:time_util",
"//test/util:timer_util",
- "@com_google_absl//absl/strings",
- "@com_google_absl//absl/synchronization",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
@@ -1589,11 +1601,11 @@ cc_binary(
"//test/util:capability_util",
"//test/util:file_descriptor",
"//test/util:fs_util",
- "//test/util:test_main",
- "//test/util:test_util",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
+ gtest,
+ "//test/util:test_main",
+ "//test/util:test_util",
],
)
@@ -1605,17 +1617,17 @@ cc_binary(
deps = [
"//test/util:file_descriptor",
"//test/util:fs_util",
+ "@com_google_absl//absl/container:flat_hash_set",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/strings:str_format",
+ "@com_google_absl//absl/types:optional",
+ gtest,
"//test/util:memory_util",
"//test/util:posix_error",
"//test/util:proc_util",
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_absl//absl/container:flat_hash_set",
- "@com_google_absl//absl/strings",
- "@com_google_absl//absl/strings:str_format",
- "@com_google_absl//absl/types:optional",
- "@com_google_googletest//:gtest",
],
)
@@ -1629,6 +1641,8 @@ cc_binary(
"//test/util:cleanup",
"//test/util:file_descriptor",
"//test/util:fs_util",
+ "@com_google_absl//absl/strings",
+ gtest,
"//test/util:logging",
"//test/util:multiprocess_util",
"//test/util:posix_error",
@@ -1636,8 +1650,6 @@ cc_binary(
"//test/util:test_main",
"//test/util:test_util",
"//test/util:time_util",
- "@com_google_absl//absl/strings",
- "@com_google_googletest//:gtest",
],
)
@@ -1648,11 +1660,11 @@ cc_binary(
linkstatic = 1,
deps = [
":base_poll_test",
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:signal_util",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
@@ -1662,6 +1674,9 @@ cc_binary(
srcs = ["ptrace.cc"],
linkstatic = 1,
deps = [
+ "@com_google_absl//absl/flags:flag",
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:logging",
"//test/util:multiprocess_util",
"//test/util:platform_util",
@@ -1669,9 +1684,6 @@ cc_binary(
"//test/util:test_util",
"//test/util:thread_util",
"//test/util:time_util",
- "@com_google_absl//absl/flags:flag",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
@@ -1681,10 +1693,10 @@ cc_binary(
srcs = ["pwrite64.cc"],
linkstatic = 1,
deps = [
+ gtest,
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -1698,12 +1710,12 @@ cc_binary(
deps = [
":file_base",
"//test/util:file_descriptor",
+ "@com_google_absl//absl/strings",
+ gtest,
"//test/util:posix_error",
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_absl//absl/strings",
- "@com_google_googletest//:gtest",
],
)
@@ -1717,11 +1729,11 @@ cc_binary(
":unix_domain_socket_test_util",
"//test/util:capability_util",
"//test/util:file_descriptor",
- "//test/util:test_main",
- "//test/util:test_util",
"@com_google_absl//absl/base:core_headers",
"@com_google_absl//absl/base:endian",
- "@com_google_googletest//:gtest",
+ gtest,
+ "//test/util:test_main",
+ "//test/util:test_util",
],
)
@@ -1735,10 +1747,10 @@ cc_binary(
":unix_domain_socket_test_util",
"//test/util:capability_util",
"//test/util:file_descriptor",
+ "@com_google_absl//absl/base:core_headers",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_absl//absl/base:core_headers",
- "@com_google_googletest//:gtest",
],
)
@@ -1752,10 +1764,10 @@ cc_binary(
":unix_domain_socket_test_util",
"//test/util:capability_util",
"//test/util:file_descriptor",
+ "@com_google_absl//absl/base:core_headers",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_absl//absl/base:core_headers",
- "@com_google_googletest//:gtest",
],
)
@@ -1766,10 +1778,10 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:file_descriptor",
+ gtest,
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -1780,10 +1792,10 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:file_descriptor",
+ gtest,
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -1799,13 +1811,13 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:file_descriptor",
+ "@com_google_absl//absl/strings",
+ gtest,
"//test/util:posix_error",
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
"//test/util:timer_util",
- "@com_google_absl//absl/strings",
- "@com_google_googletest//:gtest",
],
)
@@ -1820,12 +1832,12 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:file_descriptor",
+ "@com_google_absl//absl/strings",
+ gtest,
"//test/util:posix_error",
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_absl//absl/strings",
- "@com_google_googletest//:gtest",
],
)
@@ -1839,11 +1851,11 @@ cc_binary(
"//test/util:cleanup",
"//test/util:file_descriptor",
"//test/util:fs_util",
+ "@com_google_absl//absl/strings",
+ gtest,
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_absl//absl/strings",
- "@com_google_googletest//:gtest",
],
)
@@ -1867,11 +1879,11 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/syscalls/linux/rseq:lib",
+ gtest,
"//test/util:logging",
"//test/util:multiprocess_util",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -1882,11 +1894,11 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:cleanup",
+ gtest,
"//test/util:logging",
"//test/util:posix_error",
"//test/util:signal_util",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -1896,9 +1908,9 @@ cc_binary(
srcs = ["sched.cc"],
linkstatic = 1,
deps = [
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -1908,9 +1920,9 @@ cc_binary(
srcs = ["sched_yield.cc"],
linkstatic = 1,
deps = [
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -1920,6 +1932,8 @@ cc_binary(
srcs = ["seccomp.cc"],
linkstatic = 1,
deps = [
+ "@com_google_absl//absl/base:core_headers",
+ gtest,
"//test/util:logging",
"//test/util:memory_util",
"//test/util:multiprocess_util",
@@ -1927,8 +1941,6 @@ cc_binary(
"//test/util:proc_util",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_absl//absl/base:core_headers",
- "@com_google_googletest//:gtest",
],
)
@@ -1940,14 +1952,14 @@ cc_binary(
deps = [
":base_poll_test",
"//test/util:file_descriptor",
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:multiprocess_util",
"//test/util:posix_error",
"//test/util:rlimit_util",
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
@@ -1959,13 +1971,13 @@ cc_binary(
deps = [
"//test/util:eventfd_util",
"//test/util:file_descriptor",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_absl//absl/strings",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
@@ -1977,12 +1989,12 @@ cc_binary(
deps = [
":socket_test_util",
"//test/util:file_descriptor",
+ "@com_google_absl//absl/strings",
+ gtest,
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_absl//absl/strings",
- "@com_google_googletest//:gtest",
],
)
@@ -1993,13 +2005,13 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:file_descriptor",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_absl//absl/strings",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
@@ -2009,9 +2021,9 @@ cc_binary(
srcs = ["sigaction.cc"],
linkstatic = 1,
deps = [
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -2026,28 +2038,34 @@ cc_binary(
deps = [
"//test/util:cleanup",
"//test/util:fs_util",
+ gtest,
"//test/util:multiprocess_util",
"//test/util:posix_error",
"//test/util:signal_util",
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_googletest//:gtest",
],
)
cc_binary(
name = "sigiret_test",
testonly = 1,
- srcs = ["sigiret.cc"],
+ srcs = select_arch(
+ amd64 = ["sigiret.cc"],
+ arm64 = [],
+ ),
linkstatic = 1,
deps = [
+ gtest,
"//test/util:logging",
"//test/util:signal_util",
"//test/util:test_util",
"//test/util:timer_util",
- "@com_google_googletest//:gtest",
- ],
+ ] + select_arch(
+ amd64 = [],
+ arm64 = ["//test/util:test_main"],
+ ),
)
cc_binary(
@@ -2057,14 +2075,14 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:file_descriptor",
+ "@com_google_absl//absl/synchronization",
+ gtest,
"//test/util:logging",
"//test/util:posix_error",
"//test/util:signal_util",
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_absl//absl/synchronization",
- "@com_google_googletest//:gtest",
],
)
@@ -2074,10 +2092,10 @@ cc_binary(
srcs = ["sigprocmask.cc"],
linkstatic = 1,
deps = [
+ gtest,
"//test/util:signal_util",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -2087,13 +2105,13 @@ cc_binary(
srcs = ["sigstop.cc"],
linkstatic = 1,
deps = [
+ "@com_google_absl//absl/flags:flag",
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:multiprocess_util",
"//test/util:posix_error",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_absl//absl/flags:flag",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
@@ -2104,13 +2122,13 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:file_descriptor",
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:logging",
"//test/util:signal_util",
"//test/util:test_util",
"//test/util:thread_util",
"//test/util:timer_util",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
@@ -2126,14 +2144,30 @@ cc_library(
deps = [
":socket_test_util",
":unix_domain_socket_test_util",
- "//test/util:test_util",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/strings:str_format",
- "@com_google_googletest//:gtest",
+ gtest,
+ "//test/util:test_util",
],
alwayslink = 1,
)
+cc_binary(
+ name = "socket_stress_test",
+ testonly = 1,
+ srcs = [
+ "socket_generic_stress.cc",
+ ],
+ linkstatic = 1,
+ deps = [
+ ":ip_socket_test_util",
+ ":socket_test_util",
+ gtest,
+ "//test/util:test_main",
+ "//test/util:test_util",
+ ],
+)
+
cc_library(
name = "socket_unix_dgram_test_cases",
testonly = 1,
@@ -2142,8 +2176,8 @@ cc_library(
deps = [
":socket_test_util",
":unix_domain_socket_test_util",
+ gtest,
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
alwayslink = 1,
)
@@ -2156,8 +2190,8 @@ cc_library(
deps = [
":socket_test_util",
":unix_domain_socket_test_util",
+ gtest,
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
alwayslink = 1,
)
@@ -2173,11 +2207,11 @@ cc_library(
],
deps = [
":socket_test_util",
- "//test/util:test_util",
- "//test/util:thread_util",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
+ gtest,
+ "//test/util:test_util",
+ "//test/util:thread_util",
],
alwayslink = 1,
)
@@ -2194,8 +2228,8 @@ cc_library(
deps = [
":socket_test_util",
":unix_domain_socket_test_util",
+ gtest,
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
alwayslink = 1,
)
@@ -2212,9 +2246,9 @@ cc_library(
deps = [
":socket_test_util",
":unix_domain_socket_test_util",
+ gtest,
"//test/util:memory_util",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
alwayslink = 1,
)
@@ -2232,8 +2266,8 @@ cc_library(
":ip_socket_test_util",
":socket_test_util",
":unix_domain_socket_test_util",
+ gtest,
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
alwayslink = 1,
)
@@ -2250,8 +2284,8 @@ cc_library(
deps = [
":ip_socket_test_util",
":socket_test_util",
+ gtest,
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
alwayslink = 1,
)
@@ -2268,9 +2302,9 @@ cc_library(
deps = [
":ip_socket_test_util",
":socket_test_util",
- "//test/util:test_util",
"@com_google_absl//absl/memory",
- "@com_google_googletest//:gtest",
+ gtest,
+ "//test/util:test_util",
],
alwayslink = 1,
)
@@ -2287,8 +2321,8 @@ cc_library(
deps = [
":ip_socket_test_util",
":socket_test_util",
+ gtest,
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
alwayslink = 1,
)
@@ -2305,8 +2339,8 @@ cc_library(
deps = [
":ip_socket_test_util",
":socket_test_util",
+ gtest,
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
alwayslink = 1,
)
@@ -2369,9 +2403,9 @@ cc_binary(
deps = [
":socket_test_util",
":unix_domain_socket_test_util",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -2401,9 +2435,9 @@ cc_binary(
deps = [
":socket_test_util",
":unix_domain_socket_test_util",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -2433,9 +2467,9 @@ cc_binary(
deps = [
":ip_socket_test_util",
":socket_test_util",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -2533,10 +2567,10 @@ cc_binary(
":socket_bind_to_device_util",
":socket_test_util",
"//test/util:capability_util",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_googletest//:gtest",
],
)
@@ -2552,10 +2586,10 @@ cc_binary(
":socket_bind_to_device_util",
":socket_test_util",
"//test/util:capability_util",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_googletest//:gtest",
],
)
@@ -2571,10 +2605,10 @@ cc_binary(
":socket_bind_to_device_util",
":socket_test_util",
"//test/util:capability_util",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_googletest//:gtest",
],
)
@@ -2620,9 +2654,9 @@ cc_binary(
deps = [
":ip_socket_test_util",
":socket_test_util",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -2701,15 +2735,15 @@ cc_binary(
":ip_socket_test_util",
":socket_test_util",
"//test/util:file_descriptor",
+ "@com_google_absl//absl/memory",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:posix_error",
"//test/util:save_util",
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_absl//absl/memory",
- "@com_google_absl//absl/strings",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
@@ -2721,9 +2755,9 @@ cc_binary(
deps = [
":socket_test_util",
"//test/util:file_descriptor",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -2735,12 +2769,14 @@ cc_binary(
deps = [
":socket_netlink_util",
":socket_test_util",
+ "//test/util:capability_util",
"//test/util:cleanup",
"//test/util:file_descriptor",
+ "@com_google_absl//absl/strings:str_format",
+ "@com_google_absl//absl/types:optional",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_absl//absl/strings:str_format",
- "@com_google_googletest//:gtest",
],
)
@@ -2753,9 +2789,9 @@ cc_binary(
":socket_netlink_util",
":socket_test_util",
"//test/util:file_descriptor",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -2773,9 +2809,9 @@ cc_library(
deps = [
":socket_test_util",
":unix_domain_socket_test_util",
- "//test/util:test_util",
"@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
+ gtest,
+ "//test/util:test_util",
],
alwayslink = 1,
)
@@ -2792,11 +2828,11 @@ cc_library(
deps = [
":socket_test_util",
":unix_domain_socket_test_util",
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:test_util",
"//test/util:thread_util",
"//test/util:timer_util",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
alwayslink = 1,
)
@@ -2813,10 +2849,10 @@ cc_library(
deps = [
":socket_test_util",
":unix_domain_socket_test_util",
+ "@com_google_absl//absl/strings",
+ gtest,
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_absl//absl/strings",
- "@com_google_googletest//:gtest",
],
alwayslink = 1,
)
@@ -2833,10 +2869,10 @@ cc_library(
deps = [
":socket_test_util",
":unix_domain_socket_test_util",
+ "@com_google_absl//absl/strings",
+ gtest,
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_absl//absl/strings",
- "@com_google_googletest//:gtest",
],
alwayslink = 1,
)
@@ -2853,11 +2889,11 @@ cc_library(
deps = [
":socket_test_util",
":unix_domain_socket_test_util",
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:test_util",
"//test/util:thread_util",
"//test/util:timer_util",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
alwayslink = 1,
)
@@ -2874,8 +2910,8 @@ cc_library(
deps = [
":socket_test_util",
":unix_domain_socket_test_util",
+ gtest,
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
alwayslink = 1,
)
@@ -2892,10 +2928,10 @@ cc_library(
deps = [
":socket_test_util",
":unix_domain_socket_test_util",
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
alwayslink = 1,
)
@@ -2989,9 +3025,9 @@ cc_binary(
deps = [
":socket_test_util",
":unix_domain_socket_test_util",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -3003,9 +3039,9 @@ cc_binary(
deps = [
":socket_test_util",
":unix_domain_socket_test_util",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -3017,9 +3053,9 @@ cc_binary(
deps = [
":socket_test_util",
":unix_domain_socket_test_util",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -3034,9 +3070,9 @@ cc_binary(
":socket_blocking_test_cases",
":socket_test_util",
":unix_domain_socket_test_util",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -3051,9 +3087,9 @@ cc_binary(
":ip_socket_test_util",
":socket_blocking_test_cases",
":socket_test_util",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -3068,9 +3104,9 @@ cc_binary(
":socket_non_stream_blocking_test_cases",
":socket_test_util",
":unix_domain_socket_test_util",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -3085,9 +3121,9 @@ cc_binary(
":ip_socket_test_util",
":socket_non_stream_blocking_test_cases",
":socket_test_util",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -3103,9 +3139,9 @@ cc_binary(
":socket_unix_cmsg_test_cases",
":socket_unix_test_cases",
":unix_domain_socket_test_util",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -3117,9 +3153,9 @@ cc_binary(
deps = [
":socket_test_util",
":unix_domain_socket_test_util",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -3131,9 +3167,9 @@ cc_binary(
deps = [
":socket_test_util",
":unix_domain_socket_test_util",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -3146,10 +3182,10 @@ cc_binary(
":socket_netlink_util",
":socket_test_util",
"//test/util:file_descriptor",
+ "@com_google_absl//absl/base:endian",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_absl//absl/base:endian",
- "@com_google_googletest//:gtest",
],
)
@@ -3165,12 +3201,12 @@ cc_binary(
"//test/util:cleanup",
"//test/util:file_descriptor",
"//test/util:fs_util",
+ "@com_google_absl//absl/strings",
+ gtest,
"//test/util:posix_error",
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_absl//absl/strings",
- "@com_google_googletest//:gtest",
],
)
@@ -3181,11 +3217,11 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:file_descriptor",
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
@@ -3199,12 +3235,12 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:file_descriptor",
+ "@com_google_absl//absl/strings",
+ gtest,
"//test/util:posix_error",
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_absl//absl/strings",
- "@com_google_googletest//:gtest",
],
)
@@ -3217,10 +3253,10 @@ cc_binary(
"//test/util:capability_util",
"//test/util:file_descriptor",
"//test/util:fs_util",
+ gtest,
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -3230,10 +3266,10 @@ cc_binary(
srcs = ["sync.cc"],
linkstatic = 1,
deps = [
+ gtest,
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -3243,10 +3279,10 @@ cc_binary(
srcs = ["sysinfo.cc"],
linkstatic = 1,
deps = [
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
@@ -3256,9 +3292,9 @@ cc_binary(
srcs = ["syslog.cc"],
linkstatic = 1,
deps = [
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -3268,10 +3304,10 @@ cc_binary(
srcs = ["sysret.cc"],
linkstatic = 1,
deps = [
+ gtest,
"//test/util:logging",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -3283,12 +3319,12 @@ cc_binary(
deps = [
":socket_test_util",
"//test/util:file_descriptor",
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:posix_error",
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
@@ -3298,11 +3334,11 @@ cc_binary(
srcs = ["tgkill.cc"],
linkstatic = 1,
deps = [
+ gtest,
"//test/util:signal_util",
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_googletest//:gtest",
],
)
@@ -3312,10 +3348,10 @@ cc_binary(
srcs = ["time.cc"],
linkstatic = 1,
deps = [
+ gtest,
"//test/util:proc_util",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -3340,15 +3376,15 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:cleanup",
+ "@com_google_absl//absl/flags:flag",
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:logging",
"//test/util:multiprocess_util",
"//test/util:posix_error",
"//test/util:signal_util",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_absl//absl/flags:flag",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
@@ -3358,11 +3394,11 @@ cc_binary(
srcs = ["tkill.cc"],
linkstatic = 1,
deps = [
+ gtest,
"//test/util:logging",
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_googletest//:gtest",
],
)
@@ -3376,11 +3412,11 @@ cc_binary(
"//test/util:capability_util",
"//test/util:cleanup",
"//test/util:file_descriptor",
+ "@com_google_absl//absl/strings",
+ gtest,
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_absl//absl/strings",
- "@com_google_googletest//:gtest",
],
)
@@ -3396,12 +3432,12 @@ cc_library(
deps = [
":socket_test_util",
":unix_domain_socket_test_util",
+ "@com_google_absl//absl/base:core_headers",
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_absl//absl/base:core_headers",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
alwayslink = 1,
)
@@ -3424,9 +3460,9 @@ cc_binary(
deps = [
":socket_test_util",
"//test/util:file_descriptor",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -3437,14 +3473,14 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:capability_util",
+ "@com_google_absl//absl/flags:flag",
+ "@com_google_absl//absl/strings",
+ gtest,
"//test/util:posix_error",
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
"//test/util:uid_util",
- "@com_google_absl//absl/flags:flag",
- "@com_google_absl//absl/strings",
- "@com_google_googletest//:gtest",
],
)
@@ -3455,11 +3491,11 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:capability_util",
+ "@com_google_absl//absl/strings",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_absl//absl/strings",
- "@com_google_googletest//:gtest",
],
)
@@ -3472,11 +3508,11 @@ cc_binary(
"//test/util:capability_util",
"//test/util:file_descriptor",
"//test/util:fs_util",
+ "@com_google_absl//absl/strings",
+ gtest,
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_absl//absl/strings",
- "@com_google_googletest//:gtest",
],
)
@@ -3486,11 +3522,11 @@ cc_binary(
srcs = ["unshare.cc"],
linkstatic = 1,
deps = [
+ "@com_google_absl//absl/synchronization",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
- "@com_google_absl//absl/synchronization",
- "@com_google_googletest//:gtest",
],
)
@@ -3516,11 +3552,11 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:fs_util",
+ gtest,
"//test/util:posix_error",
"//test/util:proc_util",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -3530,13 +3566,13 @@ cc_binary(
srcs = ["vfork.cc"],
linkstatic = 1,
deps = [
+ "@com_google_absl//absl/flags:flag",
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:logging",
"//test/util:multiprocess_util",
"//test/util:test_util",
"//test/util:time_util",
- "@com_google_absl//absl/flags:flag",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
@@ -3548,6 +3584,10 @@ cc_binary(
deps = [
"//test/util:cleanup",
"//test/util:file_descriptor",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/synchronization",
+ "@com_google_absl//absl/time",
+ gtest,
"//test/util:logging",
"//test/util:multiprocess_util",
"//test/util:posix_error",
@@ -3556,10 +3596,6 @@ cc_binary(
"//test/util:test_util",
"//test/util:thread_util",
"//test/util:time_util",
- "@com_google_absl//absl/strings",
- "@com_google_absl//absl/synchronization",
- "@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
],
)
@@ -3570,10 +3606,10 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:cleanup",
+ gtest,
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -3584,12 +3620,12 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:fs_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/strings:str_format",
+ gtest,
"//test/util:posix_error",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_absl//absl/strings",
- "@com_google_absl//absl/strings:str_format",
- "@com_google_googletest//:gtest",
],
)
@@ -3600,14 +3636,14 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:capability_util",
- "//test/util:test_main",
- "//test/util:test_util",
- "//test/util:thread_util",
"@com_google_absl//absl/base:core_headers",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/synchronization",
"@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
+ gtest,
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:thread_util",
],
)
@@ -3633,10 +3669,10 @@ cc_binary(
linkstatic = 1,
deps = [
"//test/util:file_descriptor",
+ gtest,
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -3646,11 +3682,11 @@ cc_binary(
srcs = ["vdso_clock_gettime.cc"],
linkstatic = 1,
deps = [
- "//test/util:test_main",
- "//test/util:test_util",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
+ gtest,
+ "//test/util:test_main",
+ "//test/util:test_util",
],
)
@@ -3660,10 +3696,10 @@ cc_binary(
srcs = ["vsyscall.cc"],
linkstatic = 1,
deps = [
+ gtest,
"//test/util:proc_util",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -3676,11 +3712,11 @@ cc_binary(
":unix_domain_socket_test_util",
"//test/util:file_descriptor",
"//test/util:fs_util",
- "//test/util:test_main",
- "//test/util:test_util",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/strings:str_format",
- "@com_google_googletest//:gtest",
+ gtest,
+ "//test/util:test_main",
+ "//test/util:test_util",
],
)
@@ -3692,12 +3728,12 @@ cc_binary(
deps = [
"//test/util:file_descriptor",
"//test/util:fs_util",
+ gtest,
"//test/util:memory_util",
"//test/util:multiprocess_util",
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_googletest//:gtest",
],
)
@@ -3709,10 +3745,10 @@ cc_binary(
deps = [
":ip_socket_test_util",
"//test/util:file_descriptor",
+ "@com_google_absl//absl/strings",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_absl//absl/strings",
- "@com_google_googletest//:gtest",
],
)
@@ -3724,10 +3760,10 @@ cc_binary(
deps = [
":ip_socket_test_util",
"//test/util:file_descriptor",
+ "@com_google_absl//absl/strings",
+ gtest,
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_absl//absl/strings",
- "@com_google_googletest//:gtest",
],
)
@@ -3743,11 +3779,11 @@ cc_binary(
"//test/util:capability_util",
"//test/util:file_descriptor",
"//test/util:fs_util",
+ "@com_google_absl//absl/strings",
+ gtest,
"//test/util:posix_error",
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
- "@com_google_absl//absl/strings",
- "@com_google_googletest//:gtest",
],
)
diff --git a/test/syscalls/linux/bad.cc b/test/syscalls/linux/bad.cc
index f246a799e..adfb149df 100644
--- a/test/syscalls/linux/bad.cc
+++ b/test/syscalls/linux/bad.cc
@@ -22,11 +22,17 @@ namespace gvisor {
namespace testing {
namespace {
+#ifdef __x86_64__
+// get_kernel_syms is not supported in Linux > 2.6, and not implemented in
+// gVisor.
+constexpr uint32_t kNotImplementedSyscall = SYS_get_kernel_syms;
+#elif __aarch64__
+// Use the last of arch_specific_syscalls which are not implemented on arm64.
+constexpr uint32_t kNotImplementedSyscall = SYS_arch_specific_syscall + 15;
+#endif
TEST(BadSyscallTest, NotImplemented) {
- // get_kernel_syms is not supported in Linux > 2.6, and not implemented in
- // gVisor.
- EXPECT_THAT(syscall(SYS_get_kernel_syms), SyscallFailsWithErrno(ENOSYS));
+ EXPECT_THAT(syscall(kNotImplementedSyscall), SyscallFailsWithErrno(ENOSYS));
}
TEST(BadSyscallTest, NegativeOne) {
diff --git a/test/syscalls/linux/chroot.cc b/test/syscalls/linux/chroot.cc
index 04bc2d7b9..0a2d44a2c 100644
--- a/test/syscalls/linux/chroot.cc
+++ b/test/syscalls/linux/chroot.cc
@@ -162,7 +162,7 @@ TEST(ChrootTest, DotDotFromOpenFD) {
// getdents on fd should not error.
char buf[1024];
- ASSERT_THAT(syscall(SYS_getdents, fd.get(), buf, sizeof(buf)),
+ ASSERT_THAT(syscall(SYS_getdents64, fd.get(), buf, sizeof(buf)),
SyscallSucceeds());
}
diff --git a/test/syscalls/linux/fork.cc b/test/syscalls/linux/fork.cc
index 371890110..906f3358d 100644
--- a/test/syscalls/linux/fork.cc
+++ b/test/syscalls/linux/fork.cc
@@ -215,6 +215,8 @@ TEST_F(ForkTest, PrivateMapping) {
EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0));
}
+// CPUID is x86 specific.
+#ifdef __x86_64__
// Test that cpuid works after a fork.
TEST_F(ForkTest, Cpuid) {
pid_t child = Fork();
@@ -227,6 +229,7 @@ TEST_F(ForkTest, Cpuid) {
}
EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0));
}
+#endif
TEST_F(ForkTest, Mmap) {
pid_t child = Fork();
diff --git a/test/syscalls/linux/getdents.cc b/test/syscalls/linux/getdents.cc
index ad2dbacb8..b147d6181 100644
--- a/test/syscalls/linux/getdents.cc
+++ b/test/syscalls/linux/getdents.cc
@@ -228,19 +228,28 @@ class GetdentsTest : public ::testing::Test {
// Multiple template parameters are not allowed, so we must use explicit
// template specialization to set the syscall number.
+
+// SYS_getdents isn't defined on arm64.
+#ifdef __x86_64__
template <>
int GetdentsTest<struct linux_dirent>::SyscallNum() {
return SYS_getdents;
}
+#endif
template <>
int GetdentsTest<struct linux_dirent64>::SyscallNum() {
return SYS_getdents64;
}
-// Test both legacy getdents and getdents64.
+#ifdef __x86_64__
+// Test both legacy getdents and getdents64 on x86_64.
typedef ::testing::Types<struct linux_dirent, struct linux_dirent64>
GetdentsTypes;
+#elif __aarch64__
+// Test only getdents64 on arm64.
+typedef ::testing::Types<struct linux_dirent64> GetdentsTypes;
+#endif
TYPED_TEST_SUITE(GetdentsTest, GetdentsTypes);
// N.B. TYPED_TESTs require explicitly using this-> to access members of
diff --git a/test/syscalls/linux/ip_socket_test_util.cc b/test/syscalls/linux/ip_socket_test_util.cc
index 6b472eb2f..bba022a41 100644
--- a/test/syscalls/linux/ip_socket_test_util.cc
+++ b/test/syscalls/linux/ip_socket_test_util.cc
@@ -79,6 +79,33 @@ SocketPairKind DualStackTCPAcceptBindSocketPair(int type) {
/* dual_stack = */ true)};
}
+SocketPairKind IPv6TCPAcceptBindPersistentListenerSocketPair(int type) {
+ std::string description =
+ absl::StrCat(DescribeSocketType(type), "connected IPv6 TCP socket");
+ return SocketPairKind{description, AF_INET6, type | SOCK_STREAM, IPPROTO_TCP,
+ TCPAcceptBindPersistentListenerSocketPairCreator(
+ AF_INET6, type | SOCK_STREAM, 0,
+ /* dual_stack = */ false)};
+}
+
+SocketPairKind IPv4TCPAcceptBindPersistentListenerSocketPair(int type) {
+ std::string description =
+ absl::StrCat(DescribeSocketType(type), "connected IPv4 TCP socket");
+ return SocketPairKind{description, AF_INET, type | SOCK_STREAM, IPPROTO_TCP,
+ TCPAcceptBindPersistentListenerSocketPairCreator(
+ AF_INET, type | SOCK_STREAM, 0,
+ /* dual_stack = */ false)};
+}
+
+SocketPairKind DualStackTCPAcceptBindPersistentListenerSocketPair(int type) {
+ std::string description =
+ absl::StrCat(DescribeSocketType(type), "connected dual stack TCP socket");
+ return SocketPairKind{description, AF_INET6, type | SOCK_STREAM, IPPROTO_TCP,
+ TCPAcceptBindPersistentListenerSocketPairCreator(
+ AF_INET6, type | SOCK_STREAM, 0,
+ /* dual_stack = */ true)};
+}
+
SocketPairKind IPv6UDPBidirectionalBindSocketPair(int type) {
std::string description =
absl::StrCat(DescribeSocketType(type), "connected IPv6 UDP socket");
diff --git a/test/syscalls/linux/ip_socket_test_util.h b/test/syscalls/linux/ip_socket_test_util.h
index 0f58e0f77..083ebbcf0 100644
--- a/test/syscalls/linux/ip_socket_test_util.h
+++ b/test/syscalls/linux/ip_socket_test_util.h
@@ -50,6 +50,21 @@ SocketPairKind IPv4TCPAcceptBindSocketPair(int type);
// given type bound to the IPv4 loopback.
SocketPairKind DualStackTCPAcceptBindSocketPair(int type);
+// IPv6TCPAcceptBindPersistentListenerSocketPair is like
+// IPv6TCPAcceptBindSocketPair except it uses a persistent listening socket to
+// create all socket pairs.
+SocketPairKind IPv6TCPAcceptBindPersistentListenerSocketPair(int type);
+
+// IPv4TCPAcceptBindPersistentListenerSocketPair is like
+// IPv4TCPAcceptBindSocketPair except it uses a persistent listening socket to
+// create all socket pairs.
+SocketPairKind IPv4TCPAcceptBindPersistentListenerSocketPair(int type);
+
+// DualStackTCPAcceptBindPersistentListenerSocketPair is like
+// DualStackTCPAcceptBindSocketPair except it uses a persistent listening socket
+// to create all socket pairs.
+SocketPairKind DualStackTCPAcceptBindPersistentListenerSocketPair(int type);
+
// IPv6UDPBidirectionalBindSocketPair returns a SocketPairKind that represents
// SocketPairs created with bind() and connect() syscalls with AF_INET6 and the
// given type bound to the IPv6 loopback.
diff --git a/test/syscalls/linux/preadv2.cc b/test/syscalls/linux/preadv2.cc
index cd936ea90..4a9acd7ae 100644
--- a/test/syscalls/linux/preadv2.cc
+++ b/test/syscalls/linux/preadv2.cc
@@ -35,6 +35,8 @@ namespace {
#ifndef SYS_preadv2
#if defined(__x86_64__)
#define SYS_preadv2 327
+#elif defined(__aarch64__)
+#define SYS_preadv2 286
#else
#error "Unknown architecture"
#endif
diff --git a/test/syscalls/linux/proc.cc b/test/syscalls/linux/proc.cc
index a03c1e43d..169b723eb 100644
--- a/test/syscalls/linux/proc.cc
+++ b/test/syscalls/linux/proc.cc
@@ -1994,7 +1994,7 @@ TEST(Proc, GetdentsEnoent) {
},
nullptr, nullptr));
char buf[1024];
- ASSERT_THAT(syscall(SYS_getdents, fd.get(), buf, sizeof(buf)),
+ ASSERT_THAT(syscall(SYS_getdents64, fd.get(), buf, sizeof(buf)),
SyscallFailsWithErrno(ENOENT));
}
diff --git a/test/syscalls/linux/pwritev2.cc b/test/syscalls/linux/pwritev2.cc
index 1dbc0d6df..3fe5a600f 100644
--- a/test/syscalls/linux/pwritev2.cc
+++ b/test/syscalls/linux/pwritev2.cc
@@ -34,6 +34,8 @@ namespace {
#ifndef SYS_pwritev2
#if defined(__x86_64__)
#define SYS_pwritev2 328
+#elif defined(__aarch64__)
+#define SYS_pwritev2 287
#else
#error "Unknown architecture"
#endif
diff --git a/test/syscalls/linux/seccomp.cc b/test/syscalls/linux/seccomp.cc
index 294ee6808..2c947feb7 100644
--- a/test/syscalls/linux/seccomp.cc
+++ b/test/syscalls/linux/seccomp.cc
@@ -49,7 +49,12 @@ namespace testing {
namespace {
// A syscall not implemented by Linux that we don't expect to be called.
+#ifdef __x86_64__
constexpr uint32_t kFilteredSyscall = SYS_vserver;
+#elif __aarch64__
+// Use the last of arch_specific_syscalls which are not implemented on arm64.
+constexpr uint32_t kFilteredSyscall = SYS_arch_specific_syscall + 15;
+#endif
// Applies a seccomp-bpf filter that returns `filtered_result` for
// `sysno` and allows all other syscalls. Async-signal-safe.
diff --git a/test/syscalls/linux/socket_generic_stress.cc b/test/syscalls/linux/socket_generic_stress.cc
new file mode 100644
index 000000000..6a232238d
--- /dev/null
+++ b/test/syscalls/linux/socket_generic_stress.cc
@@ -0,0 +1,83 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Test fixture for tests that apply to pairs of connected sockets.
+using ConnectStressTest = SocketPairTest;
+
+TEST_P(ConnectStressTest, Reset65kTimes) {
+ for (int i = 0; i < 1 << 16; ++i) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ // Send some data to ensure that the connection gets reset and the port gets
+ // released immediately. This avoids either end entering TIME-WAIT.
+ char sent_data[100] = {};
+ ASSERT_THAT(write(sockets->first_fd(), sent_data, sizeof(sent_data)),
+ SyscallSucceedsWithValue(sizeof(sent_data)));
+ }
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ AllConnectedSockets, ConnectStressTest,
+ ::testing::Values(IPv6UDPBidirectionalBindSocketPair(0),
+ IPv4UDPBidirectionalBindSocketPair(0),
+ DualStackUDPBidirectionalBindSocketPair(0),
+
+ // Without REUSEADDR, we get port exhaustion on Linux.
+ SetSockOpt(SOL_SOCKET, SO_REUSEADDR,
+ &kSockOptOn)(IPv6TCPAcceptBindSocketPair(0)),
+ SetSockOpt(SOL_SOCKET, SO_REUSEADDR,
+ &kSockOptOn)(IPv4TCPAcceptBindSocketPair(0)),
+ SetSockOpt(SOL_SOCKET, SO_REUSEADDR, &kSockOptOn)(
+ DualStackTCPAcceptBindSocketPair(0))));
+
+// Test fixture for tests that apply to pairs of connected sockets created with
+// a persistent listener (if applicable).
+using PersistentListenerConnectStressTest = SocketPairTest;
+
+TEST_P(PersistentListenerConnectStressTest, 65kTimes) {
+ for (int i = 0; i < 1 << 16; ++i) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ }
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ AllConnectedSockets, PersistentListenerConnectStressTest,
+ ::testing::Values(
+ IPv6UDPBidirectionalBindSocketPair(0),
+ IPv4UDPBidirectionalBindSocketPair(0),
+ DualStackUDPBidirectionalBindSocketPair(0),
+
+ // Without REUSEADDR, we get port exhaustion on Linux.
+ SetSockOpt(SOL_SOCKET, SO_REUSEADDR, &kSockOptOn)(
+ IPv6TCPAcceptBindPersistentListenerSocketPair(0)),
+ SetSockOpt(SOL_SOCKET, SO_REUSEADDR, &kSockOptOn)(
+ IPv4TCPAcceptBindPersistentListenerSocketPair(0)),
+ SetSockOpt(SOL_SOCKET, SO_REUSEADDR, &kSockOptOn)(
+ DualStackTCPAcceptBindPersistentListenerSocketPair(0))));
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc
index 3bf7081b9..b24618a88 100644
--- a/test/syscalls/linux/socket_inet_loopback.cc
+++ b/test/syscalls/linux/socket_inet_loopback.cc
@@ -325,6 +325,12 @@ TEST_P(SocketInetLoopbackTest, TCPListenClose) {
TestAddress const& listener = param.listener;
TestAddress const& connector = param.connector;
+ constexpr int kAcceptCount = 32;
+ constexpr int kBacklog = kAcceptCount * 2;
+ constexpr int kFDs = 128;
+ constexpr int kThreadCount = 4;
+ constexpr int kFDsPerThread = kFDs / kThreadCount;
+
// Create the listening socket.
FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE(
Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP));
@@ -332,7 +338,7 @@ TEST_P(SocketInetLoopbackTest, TCPListenClose) {
ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr),
listener.addr_len),
SyscallSucceeds());
- ASSERT_THAT(listen(listen_fd.get(), 1001), SyscallSucceeds());
+ ASSERT_THAT(listen(listen_fd.get(), kBacklog), SyscallSucceeds());
// Get the port bound by the listening socket.
socklen_t addrlen = listener.addr_len;
@@ -345,9 +351,6 @@ TEST_P(SocketInetLoopbackTest, TCPListenClose) {
DisableSave ds; // Too many system calls.
sockaddr_storage conn_addr = connector.addr;
ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port));
- constexpr int kFDs = 2048;
- constexpr int kThreadCount = 4;
- constexpr int kFDsPerThread = kFDs / kThreadCount;
FileDescriptor clients[kFDs];
std::unique_ptr<ScopedThread> threads[kThreadCount];
for (int i = 0; i < kFDs; i++) {
@@ -371,7 +374,7 @@ TEST_P(SocketInetLoopbackTest, TCPListenClose) {
for (int i = 0; i < kThreadCount; i++) {
threads[i]->Join();
}
- for (int i = 0; i < 32; i++) {
+ for (int i = 0; i < kAcceptCount; i++) {
auto accepted =
ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr));
}
diff --git a/test/syscalls/linux/socket_netlink_route.cc b/test/syscalls/linux/socket_netlink_route.cc
index 1e28e658d..e5aed1eec 100644
--- a/test/syscalls/linux/socket_netlink_route.cc
+++ b/test/syscalls/linux/socket_netlink_route.cc
@@ -14,6 +14,7 @@
#include <arpa/inet.h>
#include <ifaddrs.h>
+#include <linux/if.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
#include <sys/socket.h>
@@ -25,8 +26,10 @@
#include "gtest/gtest.h"
#include "absl/strings/str_format.h"
+#include "absl/types/optional.h"
#include "test/syscalls/linux/socket_netlink_util.h"
#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/capability_util.h"
#include "test/util/cleanup.h"
#include "test/util/file_descriptor.h"
#include "test/util/test_util.h"
@@ -38,6 +41,8 @@ namespace testing {
namespace {
+constexpr uint32_t kSeq = 12345;
+
using ::testing::AnyOf;
using ::testing::Eq;
@@ -113,58 +118,224 @@ void CheckGetLinkResponse(const struct nlmsghdr* hdr, int seq, int port) {
// TODO(mpratt): Check ifinfomsg contents and following attrs.
}
+PosixError DumpLinks(
+ const FileDescriptor& fd, uint32_t seq,
+ const std::function<void(const struct nlmsghdr* hdr)>& fn) {
+ struct request {
+ struct nlmsghdr hdr;
+ struct ifinfomsg ifm;
+ };
+
+ struct request req = {};
+ req.hdr.nlmsg_len = sizeof(req);
+ req.hdr.nlmsg_type = RTM_GETLINK;
+ req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+ req.hdr.nlmsg_seq = seq;
+ req.ifm.ifi_family = AF_UNSPEC;
+
+ return NetlinkRequestResponse(fd, &req, sizeof(req), fn, false);
+}
+
TEST(NetlinkRouteTest, GetLinkDump) {
FileDescriptor fd =
ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE));
uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get()));
+ // Loopback is common among all tests, check that it's found.
+ bool loopbackFound = false;
+ ASSERT_NO_ERRNO(DumpLinks(fd, kSeq, [&](const struct nlmsghdr* hdr) {
+ CheckGetLinkResponse(hdr, kSeq, port);
+ if (hdr->nlmsg_type != RTM_NEWLINK) {
+ return;
+ }
+ ASSERT_GE(hdr->nlmsg_len, NLMSG_SPACE(sizeof(struct ifinfomsg)));
+ const struct ifinfomsg* msg =
+ reinterpret_cast<const struct ifinfomsg*>(NLMSG_DATA(hdr));
+ std::cout << "Found interface idx=" << msg->ifi_index
+ << ", type=" << std::hex << msg->ifi_type;
+ if (msg->ifi_type == ARPHRD_LOOPBACK) {
+ loopbackFound = true;
+ EXPECT_NE(msg->ifi_flags & IFF_LOOPBACK, 0);
+ }
+ }));
+ EXPECT_TRUE(loopbackFound);
+}
+
+struct Link {
+ int index;
+ std::string name;
+};
+
+PosixErrorOr<absl::optional<Link>> FindLoopbackLink() {
+ ASSIGN_OR_RETURN_ERRNO(FileDescriptor fd, NetlinkBoundSocket(NETLINK_ROUTE));
+
+ absl::optional<Link> link;
+ RETURN_IF_ERRNO(DumpLinks(fd, kSeq, [&](const struct nlmsghdr* hdr) {
+ if (hdr->nlmsg_type != RTM_NEWLINK ||
+ hdr->nlmsg_len < NLMSG_SPACE(sizeof(struct ifinfomsg))) {
+ return;
+ }
+ const struct ifinfomsg* msg =
+ reinterpret_cast<const struct ifinfomsg*>(NLMSG_DATA(hdr));
+ if (msg->ifi_type == ARPHRD_LOOPBACK) {
+ const auto* rta = FindRtAttr(hdr, msg, IFLA_IFNAME);
+ if (rta == nullptr) {
+ // Ignore links that do not have a name.
+ return;
+ }
+
+ link = Link();
+ link->index = msg->ifi_index;
+ link->name = std::string(reinterpret_cast<const char*>(RTA_DATA(rta)));
+ }
+ }));
+ return link;
+}
+
+// CheckLinkMsg checks a netlink message against an expected link.
+void CheckLinkMsg(const struct nlmsghdr* hdr, const Link& link) {
+ ASSERT_THAT(hdr->nlmsg_type, Eq(RTM_NEWLINK));
+ ASSERT_GE(hdr->nlmsg_len, NLMSG_SPACE(sizeof(struct ifinfomsg)));
+ const struct ifinfomsg* msg =
+ reinterpret_cast<const struct ifinfomsg*>(NLMSG_DATA(hdr));
+ EXPECT_EQ(msg->ifi_index, link.index);
+
+ const struct rtattr* rta = FindRtAttr(hdr, msg, IFLA_IFNAME);
+ EXPECT_NE(nullptr, rta) << "IFLA_IFNAME not found in message.";
+ if (rta != nullptr) {
+ std::string name(reinterpret_cast<const char*>(RTA_DATA(rta)));
+ EXPECT_EQ(name, link.name);
+ }
+}
+
+TEST(NetlinkRouteTest, GetLinkByIndex) {
+ absl::optional<Link> loopback_link =
+ ASSERT_NO_ERRNO_AND_VALUE(FindLoopbackLink());
+ ASSERT_TRUE(loopback_link.has_value());
+
+ FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE));
+
struct request {
struct nlmsghdr hdr;
struct ifinfomsg ifm;
};
- constexpr uint32_t kSeq = 12345;
-
struct request req = {};
req.hdr.nlmsg_len = sizeof(req);
req.hdr.nlmsg_type = RTM_GETLINK;
- req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+ req.hdr.nlmsg_flags = NLM_F_REQUEST;
req.hdr.nlmsg_seq = kSeq;
req.ifm.ifi_family = AF_UNSPEC;
+ req.ifm.ifi_index = loopback_link->index;
- // Loopback is common among all tests, check that it's found.
- bool loopbackFound = false;
+ bool found = false;
ASSERT_NO_ERRNO(NetlinkRequestResponse(
fd, &req, sizeof(req),
[&](const struct nlmsghdr* hdr) {
- CheckGetLinkResponse(hdr, kSeq, port);
- if (hdr->nlmsg_type != RTM_NEWLINK) {
- return;
- }
- ASSERT_GE(hdr->nlmsg_len, NLMSG_SPACE(sizeof(struct ifinfomsg)));
- const struct ifinfomsg* msg =
- reinterpret_cast<const struct ifinfomsg*>(NLMSG_DATA(hdr));
- std::cout << "Found interface idx=" << msg->ifi_index
- << ", type=" << std::hex << msg->ifi_type;
- if (msg->ifi_type == ARPHRD_LOOPBACK) {
- loopbackFound = true;
- EXPECT_NE(msg->ifi_flags & IFF_LOOPBACK, 0);
- }
+ CheckLinkMsg(hdr, *loopback_link);
+ found = true;
},
false));
- EXPECT_TRUE(loopbackFound);
+ EXPECT_TRUE(found) << "Netlink response does not contain any links.";
}
-TEST(NetlinkRouteTest, MsgHdrMsgUnsuppType) {
+TEST(NetlinkRouteTest, GetLinkByName) {
+ absl::optional<Link> loopback_link =
+ ASSERT_NO_ERRNO_AND_VALUE(FindLoopbackLink());
+ ASSERT_TRUE(loopback_link.has_value());
+
FileDescriptor fd =
ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE));
struct request {
struct nlmsghdr hdr;
struct ifinfomsg ifm;
+ struct rtattr rtattr;
+ char ifname[IFNAMSIZ];
+ char pad[NLMSG_ALIGNTO + RTA_ALIGNTO];
};
- constexpr uint32_t kSeq = 12345;
+ struct request req = {};
+ req.hdr.nlmsg_type = RTM_GETLINK;
+ req.hdr.nlmsg_flags = NLM_F_REQUEST;
+ req.hdr.nlmsg_seq = kSeq;
+ req.ifm.ifi_family = AF_UNSPEC;
+ req.rtattr.rta_type = IFLA_IFNAME;
+ req.rtattr.rta_len = RTA_LENGTH(loopback_link->name.size() + 1);
+ strncpy(req.ifname, loopback_link->name.c_str(), sizeof(req.ifname));
+ req.hdr.nlmsg_len =
+ NLMSG_LENGTH(sizeof(req.ifm)) + NLMSG_ALIGN(req.rtattr.rta_len);
+
+ bool found = false;
+ ASSERT_NO_ERRNO(NetlinkRequestResponse(
+ fd, &req, sizeof(req),
+ [&](const struct nlmsghdr* hdr) {
+ CheckLinkMsg(hdr, *loopback_link);
+ found = true;
+ },
+ false));
+ EXPECT_TRUE(found) << "Netlink response does not contain any links.";
+}
+
+TEST(NetlinkRouteTest, GetLinkByIndexNotFound) {
+ FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE));
+
+ struct request {
+ struct nlmsghdr hdr;
+ struct ifinfomsg ifm;
+ };
+
+ struct request req = {};
+ req.hdr.nlmsg_len = sizeof(req);
+ req.hdr.nlmsg_type = RTM_GETLINK;
+ req.hdr.nlmsg_flags = NLM_F_REQUEST;
+ req.hdr.nlmsg_seq = kSeq;
+ req.ifm.ifi_family = AF_UNSPEC;
+ req.ifm.ifi_index = 1234590;
+
+ EXPECT_THAT(NetlinkRequestAckOrError(fd, kSeq, &req, sizeof(req)),
+ PosixErrorIs(ENODEV, ::testing::_));
+}
+
+TEST(NetlinkRouteTest, GetLinkByNameNotFound) {
+ const std::string name = "nodevice?!";
+
+ FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE));
+
+ struct request {
+ struct nlmsghdr hdr;
+ struct ifinfomsg ifm;
+ struct rtattr rtattr;
+ char ifname[IFNAMSIZ];
+ char pad[NLMSG_ALIGNTO + RTA_ALIGNTO];
+ };
+
+ struct request req = {};
+ req.hdr.nlmsg_type = RTM_GETLINK;
+ req.hdr.nlmsg_flags = NLM_F_REQUEST;
+ req.hdr.nlmsg_seq = kSeq;
+ req.ifm.ifi_family = AF_UNSPEC;
+ req.rtattr.rta_type = IFLA_IFNAME;
+ req.rtattr.rta_len = RTA_LENGTH(name.size() + 1);
+ strncpy(req.ifname, name.c_str(), sizeof(req.ifname));
+ req.hdr.nlmsg_len =
+ NLMSG_LENGTH(sizeof(req.ifm)) + NLMSG_ALIGN(req.rtattr.rta_len);
+
+ EXPECT_THAT(NetlinkRequestAckOrError(fd, kSeq, &req, sizeof(req)),
+ PosixErrorIs(ENODEV, ::testing::_));
+}
+
+TEST(NetlinkRouteTest, MsgHdrMsgUnsuppType) {
+ FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE));
+
+ struct request {
+ struct nlmsghdr hdr;
+ struct ifinfomsg ifm;
+ };
struct request req = {};
req.hdr.nlmsg_len = sizeof(req);
@@ -175,18 +346,8 @@ TEST(NetlinkRouteTest, MsgHdrMsgUnsuppType) {
req.hdr.nlmsg_seq = kSeq;
req.ifm.ifi_family = AF_UNSPEC;
- ASSERT_NO_ERRNO(NetlinkRequestResponse(
- fd, &req, sizeof(req),
- [&](const struct nlmsghdr* hdr) {
- EXPECT_THAT(hdr->nlmsg_type, Eq(NLMSG_ERROR));
- EXPECT_EQ(hdr->nlmsg_seq, kSeq);
- EXPECT_GE(hdr->nlmsg_len, sizeof(*hdr) + sizeof(struct nlmsgerr));
-
- const struct nlmsgerr* msg =
- reinterpret_cast<const struct nlmsgerr*>(NLMSG_DATA(hdr));
- EXPECT_EQ(msg->error, -EOPNOTSUPP);
- },
- true));
+ EXPECT_THAT(NetlinkRequestAckOrError(fd, kSeq, &req, sizeof(req)),
+ PosixErrorIs(EOPNOTSUPP, ::testing::_));
}
TEST(NetlinkRouteTest, MsgHdrMsgTrunc) {
@@ -198,8 +359,6 @@ TEST(NetlinkRouteTest, MsgHdrMsgTrunc) {
struct ifinfomsg ifm;
};
- constexpr uint32_t kSeq = 12345;
-
struct request req = {};
req.hdr.nlmsg_len = sizeof(req);
req.hdr.nlmsg_type = RTM_GETLINK;
@@ -238,8 +397,6 @@ TEST(NetlinkRouteTest, MsgTruncMsgHdrMsgTrunc) {
struct ifinfomsg ifm;
};
- constexpr uint32_t kSeq = 12345;
-
struct request req = {};
req.hdr.nlmsg_len = sizeof(req);
req.hdr.nlmsg_type = RTM_GETLINK;
@@ -282,8 +439,6 @@ TEST(NetlinkRouteTest, ControlMessageIgnored) {
struct ifinfomsg ifm;
};
- constexpr uint32_t kSeq = 12345;
-
struct request req = {};
// This control message is ignored. We still receive a response for the
@@ -317,8 +472,6 @@ TEST(NetlinkRouteTest, GetAddrDump) {
struct rtgenmsg rgm;
};
- constexpr uint32_t kSeq = 12345;
-
struct request req;
req.hdr.nlmsg_len = sizeof(req);
req.hdr.nlmsg_type = RTM_GETADDR;
@@ -367,6 +520,57 @@ TEST(NetlinkRouteTest, LookupAll) {
ASSERT_GT(count, 0);
}
+TEST(NetlinkRouteTest, AddAddr) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
+
+ absl::optional<Link> loopback_link =
+ ASSERT_NO_ERRNO_AND_VALUE(FindLoopbackLink());
+ ASSERT_TRUE(loopback_link.has_value());
+
+ FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE));
+
+ struct request {
+ struct nlmsghdr hdr;
+ struct ifaddrmsg ifa;
+ struct rtattr rtattr;
+ struct in_addr addr;
+ char pad[NLMSG_ALIGNTO + RTA_ALIGNTO];
+ };
+
+ struct request req = {};
+ req.hdr.nlmsg_type = RTM_NEWADDR;
+ req.hdr.nlmsg_seq = kSeq;
+ req.ifa.ifa_family = AF_INET;
+ req.ifa.ifa_prefixlen = 24;
+ req.ifa.ifa_flags = 0;
+ req.ifa.ifa_scope = 0;
+ req.ifa.ifa_index = loopback_link->index;
+ req.rtattr.rta_type = IFA_LOCAL;
+ req.rtattr.rta_len = RTA_LENGTH(sizeof(req.addr));
+ inet_pton(AF_INET, "10.0.0.1", &req.addr);
+ req.hdr.nlmsg_len =
+ NLMSG_LENGTH(sizeof(req.ifa)) + NLMSG_ALIGN(req.rtattr.rta_len);
+
+ // Create should succeed, as no such address in kernel.
+ req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_ACK;
+ EXPECT_NO_ERRNO(
+ NetlinkRequestAckOrError(fd, req.hdr.nlmsg_seq, &req, req.hdr.nlmsg_len));
+
+ // Replace an existing address should succeed.
+ req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_REPLACE | NLM_F_ACK;
+ req.hdr.nlmsg_seq++;
+ EXPECT_NO_ERRNO(
+ NetlinkRequestAckOrError(fd, req.hdr.nlmsg_seq, &req, req.hdr.nlmsg_len));
+
+ // Create exclusive should fail, as we created the address above.
+ req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
+ req.hdr.nlmsg_seq++;
+ EXPECT_THAT(
+ NetlinkRequestAckOrError(fd, req.hdr.nlmsg_seq, &req, req.hdr.nlmsg_len),
+ PosixErrorIs(EEXIST, ::testing::_));
+}
+
// GetRouteDump tests a RTM_GETROUTE + NLM_F_DUMP request.
TEST(NetlinkRouteTest, GetRouteDump) {
FileDescriptor fd =
@@ -378,8 +582,6 @@ TEST(NetlinkRouteTest, GetRouteDump) {
struct rtmsg rtm;
};
- constexpr uint32_t kSeq = 12345;
-
struct request req = {};
req.hdr.nlmsg_len = sizeof(req);
req.hdr.nlmsg_type = RTM_GETROUTE;
@@ -538,8 +740,6 @@ TEST(NetlinkRouteTest, RecvmsgTrunc) {
struct rtgenmsg rgm;
};
- constexpr uint32_t kSeq = 12345;
-
struct request req;
req.hdr.nlmsg_len = sizeof(req);
req.hdr.nlmsg_type = RTM_GETADDR;
@@ -615,8 +815,6 @@ TEST(NetlinkRouteTest, RecvmsgTruncPeek) {
struct rtgenmsg rgm;
};
- constexpr uint32_t kSeq = 12345;
-
struct request req;
req.hdr.nlmsg_len = sizeof(req);
req.hdr.nlmsg_type = RTM_GETADDR;
@@ -695,8 +893,6 @@ TEST(NetlinkRouteTest, NoPasscredNoCreds) {
struct rtgenmsg rgm;
};
- constexpr uint32_t kSeq = 12345;
-
struct request req;
req.hdr.nlmsg_len = sizeof(req);
req.hdr.nlmsg_type = RTM_GETADDR;
@@ -743,8 +939,6 @@ TEST(NetlinkRouteTest, PasscredCreds) {
struct rtgenmsg rgm;
};
- constexpr uint32_t kSeq = 12345;
-
struct request req;
req.hdr.nlmsg_len = sizeof(req);
req.hdr.nlmsg_type = RTM_GETADDR;
diff --git a/test/syscalls/linux/socket_netlink_util.cc b/test/syscalls/linux/socket_netlink_util.cc
index cd2212a1a..952eecfe8 100644
--- a/test/syscalls/linux/socket_netlink_util.cc
+++ b/test/syscalls/linux/socket_netlink_util.cc
@@ -16,6 +16,7 @@
#include <linux/if_arp.h>
#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
#include <sys/socket.h>
#include <vector>
@@ -71,9 +72,10 @@ PosixError NetlinkRequestResponse(
iov.iov_base = buf.data();
iov.iov_len = buf.size();
- // Response is a series of NLM_F_MULTI messages, ending with a NLMSG_DONE
- // message.
+ // If NLM_F_MULTI is set, response is a series of messages that ends with a
+ // NLMSG_DONE message.
int type = -1;
+ int flags = 0;
do {
int len;
RETURN_ERROR_IF_SYSCALL_FAIL(len = RetryEINTR(recvmsg)(fd.get(), &msg, 0));
@@ -89,6 +91,7 @@ PosixError NetlinkRequestResponse(
for (struct nlmsghdr* hdr = reinterpret_cast<struct nlmsghdr*>(buf.data());
NLMSG_OK(hdr, len); hdr = NLMSG_NEXT(hdr, len)) {
fn(hdr);
+ flags = hdr->nlmsg_flags;
type = hdr->nlmsg_type;
// Done should include an integer payload for dump_done_errno.
// See net/netlink/af_netlink.c:netlink_dump
@@ -98,11 +101,11 @@ PosixError NetlinkRequestResponse(
EXPECT_GE(hdr->nlmsg_len, NLMSG_LENGTH(sizeof(int)));
}
}
- } while (type != NLMSG_DONE && type != NLMSG_ERROR);
+ } while ((flags & NLM_F_MULTI) && type != NLMSG_DONE && type != NLMSG_ERROR);
if (expect_nlmsgerr) {
EXPECT_EQ(type, NLMSG_ERROR);
- } else {
+ } else if (flags & NLM_F_MULTI) {
EXPECT_EQ(type, NLMSG_DONE);
}
return NoError();
@@ -146,5 +149,39 @@ PosixError NetlinkRequestResponseSingle(
return NoError();
}
+PosixError NetlinkRequestAckOrError(const FileDescriptor& fd, uint32_t seq,
+ void* request, size_t len) {
+ // Dummy negative number for no error message received.
+ // We won't get a negative error number so there will be no confusion.
+ int err = -42;
+ RETURN_IF_ERRNO(NetlinkRequestResponse(
+ fd, request, len,
+ [&](const struct nlmsghdr* hdr) {
+ EXPECT_EQ(NLMSG_ERROR, hdr->nlmsg_type);
+ EXPECT_EQ(hdr->nlmsg_seq, seq);
+ EXPECT_GE(hdr->nlmsg_len, sizeof(*hdr) + sizeof(struct nlmsgerr));
+
+ const struct nlmsgerr* msg =
+ reinterpret_cast<const struct nlmsgerr*>(NLMSG_DATA(hdr));
+ err = -msg->error;
+ },
+ true));
+ return PosixError(err);
+}
+
+const struct rtattr* FindRtAttr(const struct nlmsghdr* hdr,
+ const struct ifinfomsg* msg, int16_t attr) {
+ const int ifi_space = NLMSG_SPACE(sizeof(*msg));
+ int attrlen = hdr->nlmsg_len - ifi_space;
+ const struct rtattr* rta = reinterpret_cast<const struct rtattr*>(
+ reinterpret_cast<const uint8_t*>(hdr) + NLMSG_ALIGN(ifi_space));
+ for (; RTA_OK(rta, attrlen); rta = RTA_NEXT(rta, attrlen)) {
+ if (rta->rta_type == attr) {
+ return rta;
+ }
+ }
+ return nullptr;
+}
+
} // namespace testing
} // namespace gvisor
diff --git a/test/syscalls/linux/socket_netlink_util.h b/test/syscalls/linux/socket_netlink_util.h
index 3678c0599..e13ead406 100644
--- a/test/syscalls/linux/socket_netlink_util.h
+++ b/test/syscalls/linux/socket_netlink_util.h
@@ -19,6 +19,7 @@
// socket.h has to be included before if_arp.h.
#include <linux/if_arp.h>
#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
#include "test/util/file_descriptor.h"
#include "test/util/posix_error.h"
@@ -47,6 +48,14 @@ PosixError NetlinkRequestResponseSingle(
const FileDescriptor& fd, void* request, size_t len,
const std::function<void(const struct nlmsghdr* hdr)>& fn);
+// Send the passed request then expect and return an ack or error.
+PosixError NetlinkRequestAckOrError(const FileDescriptor& fd, uint32_t seq,
+ void* request, size_t len);
+
+// Find rtnetlink attribute in message.
+const struct rtattr* FindRtAttr(const struct nlmsghdr* hdr,
+ const struct ifinfomsg* msg, int16_t attr);
+
} // namespace testing
} // namespace gvisor
diff --git a/test/syscalls/linux/socket_test_util.cc b/test/syscalls/linux/socket_test_util.cc
index eff7d577e..c0c5ab3fe 100644
--- a/test/syscalls/linux/socket_test_util.cc
+++ b/test/syscalls/linux/socket_test_util.cc
@@ -18,10 +18,13 @@
#include <poll.h>
#include <sys/socket.h>
+#include <memory>
+
#include "gtest/gtest.h"
#include "absl/memory/memory.h"
#include "absl/strings/str_cat.h"
#include "absl/time/clock.h"
+#include "absl/types/optional.h"
#include "test/util/file_descriptor.h"
#include "test/util/posix_error.h"
#include "test/util/temp_path.h"
@@ -109,7 +112,10 @@ Creator<SocketPair> AcceptBindSocketPairCreator(bool abstract, int domain,
MaybeSave(); // Unlinked path.
}
- return absl::make_unique<AddrFDSocketPair>(connected, accepted, bind_addr,
+ // accepted is before connected to destruct connected before accepted.
+ // Destructors for nonstatic member objects are called in the reverse order
+ // in which they appear in the class declaration.
+ return absl::make_unique<AddrFDSocketPair>(accepted, connected, bind_addr,
extra_addr);
};
}
@@ -311,11 +317,16 @@ PosixErrorOr<T> BindIP(int fd, bool dual_stack) {
}
template <typename T>
-PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> CreateTCPAcceptBindSocketPair(
- int bound, int connected, int type, bool dual_stack) {
- ASSIGN_OR_RETURN_ERRNO(T bind_addr, BindIP<T>(bound, dual_stack));
- RETURN_ERROR_IF_SYSCALL_FAIL(listen(bound, /* backlog = */ 5));
+PosixErrorOr<T> TCPBindAndListen(int fd, bool dual_stack) {
+ ASSIGN_OR_RETURN_ERRNO(T addr, BindIP<T>(fd, dual_stack));
+ RETURN_ERROR_IF_SYSCALL_FAIL(listen(fd, /* backlog = */ 5));
+ return addr;
+}
+template <typename T>
+PosixErrorOr<std::unique_ptr<AddrFDSocketPair>>
+CreateTCPConnectAcceptSocketPair(int bound, int connected, int type,
+ bool dual_stack, T bind_addr) {
int connect_result = 0;
RETURN_ERROR_IF_SYSCALL_FAIL(
(connect_result = RetryEINTR(connect)(
@@ -358,16 +369,27 @@ PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> CreateTCPAcceptBindSocketPair(
absl::SleepFor(absl::Seconds(1));
}
- // Cleanup no longer needed resources.
- RETURN_ERROR_IF_SYSCALL_FAIL(close(bound));
- MaybeSave(); // Successful close.
-
T extra_addr = {};
LocalhostAddr(&extra_addr, dual_stack);
return absl::make_unique<AddrFDSocketPair>(connected, accepted, bind_addr,
extra_addr);
}
+template <typename T>
+PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> CreateTCPAcceptBindSocketPair(
+ int bound, int connected, int type, bool dual_stack) {
+ ASSIGN_OR_RETURN_ERRNO(T bind_addr, TCPBindAndListen<T>(bound, dual_stack));
+
+ auto result = CreateTCPConnectAcceptSocketPair(bound, connected, type,
+ dual_stack, bind_addr);
+
+ // Cleanup no longer needed resources.
+ RETURN_ERROR_IF_SYSCALL_FAIL(close(bound));
+ MaybeSave(); // Successful close.
+
+ return result;
+}
+
Creator<SocketPair> TCPAcceptBindSocketPairCreator(int domain, int type,
int protocol,
bool dual_stack) {
@@ -389,6 +411,63 @@ Creator<SocketPair> TCPAcceptBindSocketPairCreator(int domain, int type,
};
}
+Creator<SocketPair> TCPAcceptBindPersistentListenerSocketPairCreator(
+ int domain, int type, int protocol, bool dual_stack) {
+ // These are lazily initialized below, on the first call to the returned
+ // lambda. These values are private to each returned lambda, but shared across
+ // invocations of a specific lambda.
+ //
+ // The sharing allows pairs created with the same parameters to share a
+ // listener. This prevents future connects from failing if the connecting
+ // socket selects a port which had previously been used by a listening socket
+ // that still has some connections in TIME-WAIT.
+ //
+ // The lazy initialization is to avoid creating sockets during parameter
+ // enumeration. This is important because parameters are enumerated during the
+ // build process where networking may not be available.
+ auto listener = std::make_shared<absl::optional<int>>(absl::optional<int>());
+ auto addr4 = std::make_shared<absl::optional<sockaddr_in>>(
+ absl::optional<sockaddr_in>());
+ auto addr6 = std::make_shared<absl::optional<sockaddr_in6>>(
+ absl::optional<sockaddr_in6>());
+
+ return [=]() -> PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> {
+ int connected;
+ RETURN_ERROR_IF_SYSCALL_FAIL(connected = socket(domain, type, protocol));
+ MaybeSave(); // Successful socket creation.
+
+ // Share the listener across invocations.
+ if (!listener->has_value()) {
+ int fd = socket(domain, type, protocol);
+ if (fd < 0) {
+ return PosixError(errno, absl::StrCat("socket(", domain, ", ", type,
+ ", ", protocol, ")"));
+ }
+ listener->emplace(fd);
+ MaybeSave(); // Successful socket creation.
+ }
+
+ // Bind the listener once, but create a new connect/accept pair each
+ // time.
+ if (domain == AF_INET) {
+ if (!addr4->has_value()) {
+ addr4->emplace(
+ TCPBindAndListen<sockaddr_in>(listener->value(), dual_stack)
+ .ValueOrDie());
+ }
+ return CreateTCPConnectAcceptSocketPair(listener->value(), connected,
+ type, dual_stack, addr4->value());
+ }
+ if (!addr6->has_value()) {
+ addr6->emplace(
+ TCPBindAndListen<sockaddr_in6>(listener->value(), dual_stack)
+ .ValueOrDie());
+ }
+ return CreateTCPConnectAcceptSocketPair(listener->value(), connected, type,
+ dual_stack, addr6->value());
+ };
+}
+
template <typename T>
PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> CreateUDPBoundSocketPair(
int sock1, int sock2, int type, bool dual_stack) {
@@ -518,8 +597,8 @@ size_t CalculateUnixSockAddrLen(const char* sun_path) {
if (sun_path[0] == 0) {
return sizeof(sockaddr_un);
}
- // Filesystem addresses use the address length plus the 2 byte sun_family and
- // null terminator.
+ // Filesystem addresses use the address length plus the 2 byte sun_family
+ // and null terminator.
return strlen(sun_path) + 3;
}
diff --git a/test/syscalls/linux/socket_test_util.h b/test/syscalls/linux/socket_test_util.h
index 2dbb8bed3..bfaa6e397 100644
--- a/test/syscalls/linux/socket_test_util.h
+++ b/test/syscalls/linux/socket_test_util.h
@@ -273,6 +273,12 @@ Creator<SocketPair> TCPAcceptBindSocketPairCreator(int domain, int type,
int protocol,
bool dual_stack);
+// TCPAcceptBindPersistentListenerSocketPairCreator is like
+// TCPAcceptBindSocketPairCreator, except it uses the same listening socket to
+// create all SocketPairs.
+Creator<SocketPair> TCPAcceptBindPersistentListenerSocketPairCreator(
+ int domain, int type, int protocol, bool dual_stack);
+
// UDPBidirectionalBindSocketPairCreator returns a Creator<SocketPair> that
// obtains file descriptors by invoking the bind() and connect() syscalls on UDP
// sockets.
diff --git a/test/syscalls/linux/stat.cc b/test/syscalls/linux/stat.cc
index 388d75835..c951ac3b3 100644
--- a/test/syscalls/linux/stat.cc
+++ b/test/syscalls/linux/stat.cc
@@ -557,6 +557,8 @@ TEST(SimpleStatTest, AnonDeviceAllocatesUniqueInodesAcrossSaveRestore) {
#ifndef SYS_statx
#if defined(__x86_64__)
#define SYS_statx 332
+#elif defined(__aarch64__)
+#define SYS_statx 291
#else
#error "Unknown architecture"
#endif
diff --git a/test/syscalls/linux/timers.cc b/test/syscalls/linux/timers.cc
index 3db18d7ac..2f92c27da 100644
--- a/test/syscalls/linux/timers.cc
+++ b/test/syscalls/linux/timers.cc
@@ -297,9 +297,13 @@ class IntervalTimer {
PosixErrorOr<IntervalTimer> TimerCreate(clockid_t clockid,
const struct sigevent& sev) {
int timerid;
- if (syscall(SYS_timer_create, clockid, &sev, &timerid) < 0) {
+ int ret = syscall(SYS_timer_create, clockid, &sev, &timerid);
+ if (ret < 0) {
return PosixError(errno, "timer_create");
}
+ if (ret > 0) {
+ return PosixError(EINVAL, "timer_create should never return positive");
+ }
MaybeSave();
return IntervalTimer(timerid);
}
@@ -317,6 +321,18 @@ TEST(IntervalTimerTest, IsInitiallyStopped) {
EXPECT_EQ(0, its.it_value.tv_nsec);
}
+// Kernel can create multiple timers without issue.
+//
+// Regression test for gvisor.dev/issue/1738.
+TEST(IntervalTimerTest, MultipleTimers) {
+ struct sigevent sev = {};
+ sev.sigev_notify = SIGEV_NONE;
+ const auto timer1 =
+ ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev));
+ const auto timer2 =
+ ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev));
+}
+
TEST(IntervalTimerTest, SingleShotSilent) {
struct sigevent sev = {};
sev.sigev_notify = SIGEV_NONE;
diff --git a/test/syscalls/linux/xattr.cc b/test/syscalls/linux/xattr.cc
index ab21d68c6..85eb31847 100644
--- a/test/syscalls/linux/xattr.cc
+++ b/test/syscalls/linux/xattr.cc
@@ -39,6 +39,10 @@ namespace {
class XattrTest : public FileTest {};
TEST_F(XattrTest, XattrNullName) {
+ // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are
+ // supported, and get/set have been added pack to the syscall table.
+ SKIP_IF(IsRunningOnGvisor());
+
const char* path = test_file_name_.c_str();
EXPECT_THAT(setxattr(path, nullptr, nullptr, 0, /*flags=*/0),
@@ -48,6 +52,10 @@ TEST_F(XattrTest, XattrNullName) {
}
TEST_F(XattrTest, XattrEmptyName) {
+ // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are
+ // supported, and get/set have been added pack to the syscall table.
+ SKIP_IF(IsRunningOnGvisor());
+
const char* path = test_file_name_.c_str();
EXPECT_THAT(setxattr(path, "", nullptr, 0, /*flags=*/0),
@@ -56,6 +64,10 @@ TEST_F(XattrTest, XattrEmptyName) {
}
TEST_F(XattrTest, XattrLargeName) {
+ // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are
+ // supported, and get/set have been added pack to the syscall table.
+ SKIP_IF(IsRunningOnGvisor());
+
const char* path = test_file_name_.c_str();
std::string name = "user.";
name += std::string(XATTR_NAME_MAX - name.length(), 'a');
@@ -77,6 +89,10 @@ TEST_F(XattrTest, XattrLargeName) {
}
TEST_F(XattrTest, XattrInvalidPrefix) {
+ // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are
+ // supported, and get/set have been added pack to the syscall table.
+ SKIP_IF(IsRunningOnGvisor());
+
const char* path = test_file_name_.c_str();
std::string name(XATTR_NAME_MAX, 'a');
EXPECT_THAT(setxattr(path, name.c_str(), nullptr, 0, /*flags=*/0),
@@ -88,6 +104,10 @@ TEST_F(XattrTest, XattrInvalidPrefix) {
// Do not allow save/restore cycles after making the test file read-only, as
// the restore will fail to open it with r/w permissions.
TEST_F(XattrTest, XattrReadOnly_NoRandomSave) {
+ // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are
+ // supported, and get/set have been added pack to the syscall table.
+ SKIP_IF(IsRunningOnGvisor());
+
// Drop capabilities that allow us to override file and directory permissions.
ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
@@ -113,6 +133,10 @@ TEST_F(XattrTest, XattrReadOnly_NoRandomSave) {
// Do not allow save/restore cycles after making the test file write-only, as
// the restore will fail to open it with r/w permissions.
TEST_F(XattrTest, XattrWriteOnly_NoRandomSave) {
+ // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are
+ // supported, and get/set have been added pack to the syscall table.
+ SKIP_IF(IsRunningOnGvisor());
+
// Drop capabilities that allow us to override file and directory permissions.
ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
@@ -143,6 +167,10 @@ TEST_F(XattrTest, XattrTrustedWithNonadmin) {
}
TEST_F(XattrTest, XattrOnDirectory) {
+ // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are
+ // supported, and get/set have been added pack to the syscall table.
+ SKIP_IF(IsRunningOnGvisor());
+
TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
const char name[] = "user.test";
EXPECT_THAT(setxattr(dir.path().c_str(), name, NULL, 0, /*flags=*/0),
@@ -152,6 +180,10 @@ TEST_F(XattrTest, XattrOnDirectory) {
}
TEST_F(XattrTest, XattrOnSymlink) {
+ // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are
+ // supported, and get/set have been added pack to the syscall table.
+ SKIP_IF(IsRunningOnGvisor());
+
TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
TempPath::CreateSymlinkTo(dir.path(), test_file_name_));
@@ -163,6 +195,10 @@ TEST_F(XattrTest, XattrOnSymlink) {
}
TEST_F(XattrTest, XattrOnInvalidFileTypes) {
+ // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are
+ // supported, and get/set have been added pack to the syscall table.
+ SKIP_IF(IsRunningOnGvisor());
+
const char name[] = "user.test";
char char_device[] = "/dev/zero";
@@ -181,6 +217,10 @@ TEST_F(XattrTest, XattrOnInvalidFileTypes) {
}
TEST_F(XattrTest, SetxattrSizeSmallerThanValue) {
+ // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are
+ // supported, and get/set have been added pack to the syscall table.
+ SKIP_IF(IsRunningOnGvisor());
+
const char* path = test_file_name_.c_str();
const char name[] = "user.test";
std::vector<char> val = {'a', 'a'};
@@ -196,6 +236,10 @@ TEST_F(XattrTest, SetxattrSizeSmallerThanValue) {
}
TEST_F(XattrTest, SetxattrZeroSize) {
+ // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are
+ // supported, and get/set have been added pack to the syscall table.
+ SKIP_IF(IsRunningOnGvisor());
+
const char* path = test_file_name_.c_str();
const char name[] = "user.test";
char val = 'a';
@@ -208,6 +252,10 @@ TEST_F(XattrTest, SetxattrZeroSize) {
}
TEST_F(XattrTest, SetxattrSizeTooLarge) {
+ // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are
+ // supported, and get/set have been added pack to the syscall table.
+ SKIP_IF(IsRunningOnGvisor());
+
const char* path = test_file_name_.c_str();
const char name[] = "user.test";
@@ -223,6 +271,10 @@ TEST_F(XattrTest, SetxattrSizeTooLarge) {
}
TEST_F(XattrTest, SetxattrNullValueAndNonzeroSize) {
+ // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are
+ // supported, and get/set have been added pack to the syscall table.
+ SKIP_IF(IsRunningOnGvisor());
+
const char* path = test_file_name_.c_str();
const char name[] = "user.test";
EXPECT_THAT(setxattr(path, name, nullptr, 1, /*flags=*/0),
@@ -232,6 +284,10 @@ TEST_F(XattrTest, SetxattrNullValueAndNonzeroSize) {
}
TEST_F(XattrTest, SetxattrNullValueAndZeroSize) {
+ // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are
+ // supported, and get/set have been added pack to the syscall table.
+ SKIP_IF(IsRunningOnGvisor());
+
const char* path = test_file_name_.c_str();
const char name[] = "user.test";
EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallSucceeds());
@@ -240,6 +296,10 @@ TEST_F(XattrTest, SetxattrNullValueAndZeroSize) {
}
TEST_F(XattrTest, SetxattrValueTooLargeButOKSize) {
+ // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are
+ // supported, and get/set have been added pack to the syscall table.
+ SKIP_IF(IsRunningOnGvisor());
+
const char* path = test_file_name_.c_str();
const char name[] = "user.test";
std::vector<char> val(XATTR_SIZE_MAX + 1);
@@ -256,6 +316,10 @@ TEST_F(XattrTest, SetxattrValueTooLargeButOKSize) {
}
TEST_F(XattrTest, SetxattrReplaceWithSmaller) {
+ // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are
+ // supported, and get/set have been added pack to the syscall table.
+ SKIP_IF(IsRunningOnGvisor());
+
const char* path = test_file_name_.c_str();
const char name[] = "user.test";
std::vector<char> val = {'a', 'a'};
@@ -271,6 +335,10 @@ TEST_F(XattrTest, SetxattrReplaceWithSmaller) {
}
TEST_F(XattrTest, SetxattrReplaceWithLarger) {
+ // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are
+ // supported, and get/set have been added pack to the syscall table.
+ SKIP_IF(IsRunningOnGvisor());
+
const char* path = test_file_name_.c_str();
const char name[] = "user.test";
std::vector<char> val = {'a', 'a'};
@@ -285,6 +353,10 @@ TEST_F(XattrTest, SetxattrReplaceWithLarger) {
}
TEST_F(XattrTest, SetxattrCreateFlag) {
+ // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are
+ // supported, and get/set have been added pack to the syscall table.
+ SKIP_IF(IsRunningOnGvisor());
+
const char* path = test_file_name_.c_str();
const char name[] = "user.test";
EXPECT_THAT(setxattr(path, name, nullptr, 0, XATTR_CREATE),
@@ -296,6 +368,10 @@ TEST_F(XattrTest, SetxattrCreateFlag) {
}
TEST_F(XattrTest, SetxattrReplaceFlag) {
+ // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are
+ // supported, and get/set have been added pack to the syscall table.
+ SKIP_IF(IsRunningOnGvisor());
+
const char* path = test_file_name_.c_str();
const char name[] = "user.test";
EXPECT_THAT(setxattr(path, name, nullptr, 0, XATTR_REPLACE),
@@ -308,6 +384,10 @@ TEST_F(XattrTest, SetxattrReplaceFlag) {
}
TEST_F(XattrTest, SetxattrInvalidFlags) {
+ // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are
+ // supported, and get/set have been added pack to the syscall table.
+ SKIP_IF(IsRunningOnGvisor());
+
const char* path = test_file_name_.c_str();
int invalid_flags = 0xff;
EXPECT_THAT(setxattr(path, nullptr, nullptr, 0, invalid_flags),
@@ -315,6 +395,10 @@ TEST_F(XattrTest, SetxattrInvalidFlags) {
}
TEST_F(XattrTest, Getxattr) {
+ // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are
+ // supported, and get/set have been added pack to the syscall table.
+ SKIP_IF(IsRunningOnGvisor());
+
const char* path = test_file_name_.c_str();
const char name[] = "user.test";
int val = 1234;
@@ -327,6 +411,10 @@ TEST_F(XattrTest, Getxattr) {
}
TEST_F(XattrTest, GetxattrSizeSmallerThanValue) {
+ // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are
+ // supported, and get/set have been added pack to the syscall table.
+ SKIP_IF(IsRunningOnGvisor());
+
const char* path = test_file_name_.c_str();
const char name[] = "user.test";
std::vector<char> val = {'a', 'a'};
@@ -339,6 +427,10 @@ TEST_F(XattrTest, GetxattrSizeSmallerThanValue) {
}
TEST_F(XattrTest, GetxattrSizeLargerThanValue) {
+ // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are
+ // supported, and get/set have been added pack to the syscall table.
+ SKIP_IF(IsRunningOnGvisor());
+
const char* path = test_file_name_.c_str();
const char name[] = "user.test";
char val = 'a';
@@ -354,6 +446,10 @@ TEST_F(XattrTest, GetxattrSizeLargerThanValue) {
}
TEST_F(XattrTest, GetxattrZeroSize) {
+ // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are
+ // supported, and get/set have been added pack to the syscall table.
+ SKIP_IF(IsRunningOnGvisor());
+
const char* path = test_file_name_.c_str();
const char name[] = "user.test";
char val = 'a';
@@ -367,6 +463,10 @@ TEST_F(XattrTest, GetxattrZeroSize) {
}
TEST_F(XattrTest, GetxattrSizeTooLarge) {
+ // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are
+ // supported, and get/set have been added pack to the syscall table.
+ SKIP_IF(IsRunningOnGvisor());
+
const char* path = test_file_name_.c_str();
const char name[] = "user.test";
char val = 'a';
@@ -383,6 +483,10 @@ TEST_F(XattrTest, GetxattrSizeTooLarge) {
}
TEST_F(XattrTest, GetxattrNullValue) {
+ // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are
+ // supported, and get/set have been added pack to the syscall table.
+ SKIP_IF(IsRunningOnGvisor());
+
const char* path = test_file_name_.c_str();
const char name[] = "user.test";
char val = 'a';
@@ -394,6 +498,10 @@ TEST_F(XattrTest, GetxattrNullValue) {
}
TEST_F(XattrTest, GetxattrNullValueAndZeroSize) {
+ // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are
+ // supported, and get/set have been added pack to the syscall table.
+ SKIP_IF(IsRunningOnGvisor());
+
const char* path = test_file_name_.c_str();
const char name[] = "user.test";
char val = 'a';
@@ -410,12 +518,20 @@ TEST_F(XattrTest, GetxattrNullValueAndZeroSize) {
}
TEST_F(XattrTest, GetxattrNonexistentName) {
+ // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are
+ // supported, and get/set have been added pack to the syscall table.
+ SKIP_IF(IsRunningOnGvisor());
+
const char* path = test_file_name_.c_str();
const char name[] = "user.test";
EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENODATA));
}
TEST_F(XattrTest, LGetSetxattrOnSymlink) {
+ // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are
+ // supported, and get/set have been added pack to the syscall table.
+ SKIP_IF(IsRunningOnGvisor());
+
TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
TempPath::CreateSymlinkTo(dir.path(), test_file_name_));
@@ -427,6 +543,10 @@ TEST_F(XattrTest, LGetSetxattrOnSymlink) {
}
TEST_F(XattrTest, LGetSetxattrOnNonsymlink) {
+ // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are
+ // supported, and get/set have been added pack to the syscall table.
+ SKIP_IF(IsRunningOnGvisor());
+
const char* path = test_file_name_.c_str();
const char name[] = "user.test";
int val = 1234;
@@ -441,6 +561,10 @@ TEST_F(XattrTest, LGetSetxattrOnNonsymlink) {
}
TEST_F(XattrTest, FGetSetxattr) {
+ // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are
+ // supported, and get/set have been added pack to the syscall table.
+ SKIP_IF(IsRunningOnGvisor());
+
const FileDescriptor fd =
ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_.c_str(), 0));
const char name[] = "user.test";
diff --git a/test/syscalls/syscall_test_runner.go b/test/syscalls/syscall_test_runner.go
index b9fd885ff..ae342b68c 100644
--- a/test/syscalls/syscall_test_runner.go
+++ b/test/syscalls/syscall_test_runner.go
@@ -450,17 +450,16 @@ func main() {
}
// Get subset of tests corresponding to shard.
- begin, end, err := testutil.TestBoundsForShard(len(testCases))
+ indices, err := testutil.TestIndicesForShard(len(testCases))
if err != nil {
fatalf("TestsForShard() failed: %v", err)
}
- testCases = testCases[begin:end]
// Run the tests.
var tests []testing.InternalTest
- for _, tc := range testCases {
+ for _, tci := range indices {
// Capture tc.
- tc := tc
+ tc := testCases[tci]
testName := fmt.Sprintf("%s_%s", tc.Suite, tc.Name)
tests = append(tests, testing.InternalTest{
Name: testName,
diff --git a/test/util/BUILD b/test/util/BUILD
index 1ac8b3fd6..1f22ebe29 100644
--- a/test/util/BUILD
+++ b/test/util/BUILD
@@ -1,4 +1,4 @@
-load("//tools:defs.bzl", "cc_library", "cc_test", "select_system")
+load("//tools:defs.bzl", "cc_library", "cc_test", "gtest", "select_system")
package(
default_visibility = ["//:sandbox"],
@@ -41,7 +41,7 @@ cc_library(
":save_util",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/strings:str_format",
- "@com_google_googletest//:gtest",
+ gtest,
],
)
@@ -55,7 +55,7 @@ cc_library(
":posix_error",
":test_util",
"@com_google_absl//absl/strings",
- "@com_google_googletest//:gtest",
+ gtest,
],
)
@@ -67,7 +67,7 @@ cc_test(
":proc_util",
":test_main",
":test_util",
- "@com_google_googletest//:gtest",
+ gtest,
],
)
@@ -87,7 +87,7 @@ cc_library(
":file_descriptor",
":posix_error",
"@com_google_absl//absl/strings",
- "@com_google_googletest//:gtest",
+ gtest,
],
)
@@ -101,7 +101,7 @@ cc_test(
":temp_path",
":test_main",
":test_util",
- "@com_google_googletest//:gtest",
+ gtest,
],
)
@@ -134,7 +134,7 @@ cc_library(
":cleanup",
":posix_error",
":test_util",
- "@com_google_googletest//:gtest",
+ gtest,
],
)
@@ -183,7 +183,7 @@ cc_library(
"@com_google_absl//absl/base:core_headers",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/types:variant",
- "@com_google_googletest//:gtest",
+ gtest,
],
)
@@ -194,7 +194,7 @@ cc_test(
deps = [
":posix_error",
":test_main",
- "@com_google_googletest//:gtest",
+ gtest,
],
)
@@ -218,7 +218,7 @@ cc_library(
":cleanup",
":posix_error",
":test_util",
- "@com_google_googletest//:gtest",
+ gtest,
],
)
@@ -233,7 +233,7 @@ cc_library(
":test_util",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
+ gtest,
],
)
@@ -259,7 +259,7 @@ cc_library(
"@com_google_absl//absl/strings",
"@com_google_absl//absl/strings:str_format",
"@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
+ gtest,
],
)
@@ -291,7 +291,7 @@ cc_library(
":posix_error",
":test_util",
"@com_google_absl//absl/time",
- "@com_google_googletest//:gtest",
+ gtest,
],
)
@@ -302,7 +302,7 @@ cc_test(
deps = [
":test_main",
":test_util",
- "@com_google_googletest//:gtest",
+ gtest,
],
)
@@ -322,7 +322,7 @@ cc_library(
":file_descriptor",
":posix_error",
":save_util",
- "@com_google_googletest//:gtest",
+ gtest,
],
)
diff --git a/test/util/platform_util.cc b/test/util/platform_util.cc
index 2724e63f3..c9200d381 100644
--- a/test/util/platform_util.cc
+++ b/test/util/platform_util.cc
@@ -20,10 +20,9 @@ namespace gvisor {
namespace testing {
PlatformSupport PlatformSupport32Bit() {
- if (GvisorPlatform() == Platform::kPtrace) {
+ if (GvisorPlatform() == Platform::kPtrace ||
+ GvisorPlatform() == Platform::kKVM) {
return PlatformSupport::NotSupported;
- } else if (GvisorPlatform() == Platform::kKVM) {
- return PlatformSupport::Segfault;
} else {
return PlatformSupport::Allowed;
}
diff --git a/test/util/signal_util.h b/test/util/signal_util.h
index bcf85c337..e7b66aa51 100644
--- a/test/util/signal_util.h
+++ b/test/util/signal_util.h
@@ -85,6 +85,20 @@ inline void FixupFault(ucontext_t* ctx) {
// The encoding is 0x48 0xab 0x00.
ctx->uc_mcontext.gregs[REG_RIP] += 3;
}
+#elif __aarch64__
+inline void Fault() {
+ // Zero and dereference x0.
+ asm("mov xzr, x0\r\n"
+ "str xzr, [x0]\r\n"
+ :
+ :
+ : "x0");
+}
+
+inline void FixupFault(ucontext_t* ctx) {
+ // Skip the bad instruction above.
+ ctx->uc_mcontext.pc += 4;
+}
#endif
} // namespace testing
diff --git a/test/util/test_util.cc b/test/util/test_util.cc
index 15cbc6da6..95e1e0c96 100644
--- a/test/util/test_util.cc
+++ b/test/util/test_util.cc
@@ -69,7 +69,6 @@ bool IsRunningWithHostinet() {
"xchg %%rdi, %%rbx\n" \
: "=a"(a), "=D"(b), "=c"(c), "=d"(d) \
: "a"(a_inp), "2"(c_inp))
-#endif // defined(__x86_64__)
CPUVendor GetCPUVendor() {
uint32_t eax, ebx, ecx, edx;
@@ -86,6 +85,7 @@ CPUVendor GetCPUVendor() {
}
return CPUVendor::kUnknownVendor;
}
+#endif // defined(__x86_64__)
bool operator==(const KernelVersion& first, const KernelVersion& second) {
return first.major == second.major && first.minor == second.minor &&
diff --git a/tools/build/defs.bzl b/tools/build/defs.bzl
index d0556abd1..1a1a0d825 100644
--- a/tools/build/defs.bzl
+++ b/tools/build/defs.bzl
@@ -8,6 +8,7 @@ load("@rules_pkg//:pkg.bzl", _pkg_deb = "pkg_deb", _pkg_tar = "pkg_tar")
load("@io_bazel_rules_docker//go:image.bzl", _go_image = "go_image")
load("@io_bazel_rules_docker//container:container.bzl", _container_image = "container_image")
load("@pydeps//:requirements.bzl", _py_requirement = "requirement")
+load("//tools/build:tags.bzl", _go_suffixes = "go_suffixes")
container_image = _container_image
cc_binary = _cc_binary
@@ -18,6 +19,8 @@ cc_test = _cc_test
cc_toolchain = "@bazel_tools//tools/cpp:current_cc_toolchain"
go_image = _go_image
go_embed_data = _go_embed_data
+go_suffixes = _go_suffixes
+gtest = "@com_google_googletest//:gtest"
loopback = "//tools/build:loopback"
proto_library = native.proto_library
pkg_deb = _pkg_deb
diff --git a/tools/build/tags.bzl b/tools/build/tags.bzl
new file mode 100644
index 000000000..e99c87f81
--- /dev/null
+++ b/tools/build/tags.bzl
@@ -0,0 +1,36 @@
+"""List of special Go suffixes."""
+
+go_suffixes = [
+ "_386",
+ "_386_unsafe",
+ "_amd64",
+ "_amd64_unsafe",
+ "_aarch64",
+ "_aarch64_unsafe",
+ "_arm",
+ "_arm_unsafe",
+ "_arm64",
+ "_arm64_unsafe",
+ "_mips",
+ "_mips_unsafe",
+ "_mipsle",
+ "_mipsle_unsafe",
+ "_mips64",
+ "_mips64_unsafe",
+ "_mips64le",
+ "_mips64le_unsafe",
+ "_ppc64",
+ "_ppc64_unsafe",
+ "_ppc64le",
+ "_ppc64le_unsafe",
+ "_riscv64",
+ "_riscv64_unsafe",
+ "_s390x",
+ "_s390x_unsafe",
+ "_sparc64",
+ "_sparc64_unsafe",
+ "_wasm",
+ "_wasm_unsafe",
+ "_linux",
+ "_linux_unsafe",
+]
diff --git a/tools/defs.bzl b/tools/defs.bzl
index 819f12b0d..5d5fa134a 100644
--- a/tools/defs.bzl
+++ b/tools/defs.bzl
@@ -7,7 +7,7 @@ change for Google-internal and bazel-compatible rules.
load("//tools/go_stateify:defs.bzl", "go_stateify")
load("//tools/go_marshal:defs.bzl", "go_marshal", "marshal_deps", "marshal_test_deps")
-load("//tools/build:defs.bzl", _cc_binary = "cc_binary", _cc_flags_supplier = "cc_flags_supplier", _cc_library = "cc_library", _cc_proto_library = "cc_proto_library", _cc_test = "cc_test", _cc_toolchain = "cc_toolchain", _container_image = "container_image", _default_installer = "default_installer", _default_net_util = "default_net_util", _go_binary = "go_binary", _go_embed_data = "go_embed_data", _go_image = "go_image", _go_library = "go_library", _go_proto_library = "go_proto_library", _go_test = "go_test", _go_tool_library = "go_tool_library", _loopback = "loopback", _pkg_deb = "pkg_deb", _pkg_tar = "pkg_tar", _proto_library = "proto_library", _py_binary = "py_binary", _py_library = "py_library", _py_requirement = "py_requirement", _py_test = "py_test", _select_arch = "select_arch", _select_system = "select_system")
+load("//tools/build:defs.bzl", "go_suffixes", _cc_binary = "cc_binary", _cc_flags_supplier = "cc_flags_supplier", _cc_library = "cc_library", _cc_proto_library = "cc_proto_library", _cc_test = "cc_test", _cc_toolchain = "cc_toolchain", _container_image = "container_image", _default_installer = "default_installer", _default_net_util = "default_net_util", _go_binary = "go_binary", _go_embed_data = "go_embed_data", _go_image = "go_image", _go_library = "go_library", _go_proto_library = "go_proto_library", _go_test = "go_test", _go_tool_library = "go_tool_library", _gtest = "gtest", _loopback = "loopback", _pkg_deb = "pkg_deb", _pkg_tar = "pkg_tar", _proto_library = "proto_library", _py_binary = "py_binary", _py_library = "py_library", _py_requirement = "py_requirement", _py_test = "py_test", _select_arch = "select_arch", _select_system = "select_system")
# Delegate directly.
cc_binary = _cc_binary
@@ -20,6 +20,7 @@ go_embed_data = _go_embed_data
go_image = _go_image
go_test = _go_test
go_tool_library = _go_tool_library
+gtest = _gtest
pkg_deb = _pkg_deb
pkg_tar = _pkg_tar
py_library = _py_library
@@ -44,6 +45,34 @@ def go_binary(name, **kwargs):
**kwargs
)
+def calculate_sets(srcs):
+ """Calculates special Go sets for templates.
+
+ Args:
+ srcs: the full set of Go sources.
+
+ Returns:
+ A dictionary of the form:
+
+ "": [src1.go, src2.go]
+ "suffix": [src3suffix.go, src4suffix.go]
+
+ Note that suffix will typically start with '_'.
+ """
+ result = dict()
+ for file in srcs:
+ if not file.endswith(".go"):
+ continue
+ target = ""
+ for suffix in go_suffixes:
+ if file.endswith(suffix + ".go"):
+ target = suffix
+ if not target in result:
+ result[target] = [file]
+ else:
+ result[target].append(file)
+ return result
+
def go_library(name, srcs, deps = [], imports = [], stateify = True, marshal = False, **kwargs):
"""Wraps the standard go_library and does stateification and marshalling.
@@ -69,39 +98,49 @@ def go_library(name, srcs, deps = [], imports = [], stateify = True, marshal = F
marshal: whether marshal is enabled (default: false).
**kwargs: standard go_library arguments.
"""
+ all_srcs = srcs
+ all_deps = deps
if stateify:
# Only do stateification for non-state packages without manual autogen.
- go_stateify(
- name = name + "_state_autogen",
- srcs = [src for src in srcs if src.endswith(".go")],
- imports = imports,
- package = name,
- arch = select_arch(),
- out = name + "_state_autogen.go",
- )
- all_srcs = srcs + [name + "_state_autogen.go"]
- if "//pkg/state" not in deps:
- all_deps = deps + ["//pkg/state"]
- else:
- all_deps = deps
- else:
- all_deps = deps
- all_srcs = srcs
+ # First, we need to segregate the input files via the special suffixes,
+ # and calculate the final output set.
+ state_sets = calculate_sets(srcs)
+ for (suffix, srcs) in state_sets.items():
+ go_stateify(
+ name = name + suffix + "_state_autogen",
+ srcs = srcs,
+ imports = imports,
+ package = name,
+ out = name + suffix + "_state_autogen.go",
+ )
+ all_srcs = all_srcs + [
+ name + suffix + "_state_autogen.go"
+ for suffix in state_sets.keys()
+ ]
+ if "//pkg/state" not in all_deps:
+ all_deps = all_deps + ["//pkg/state"]
+
if marshal:
- go_marshal(
- name = name + "_abi_autogen",
- srcs = [src for src in srcs if src.endswith(".go")],
- debug = False,
- imports = imports,
- package = name,
- )
+ # See above.
+ marshal_sets = calculate_sets(srcs)
+ for (suffix, srcs) in marshal_sets.items():
+ go_marshal(
+ name = name + suffix + "_abi_autogen",
+ srcs = srcs,
+ debug = False,
+ imports = imports,
+ package = name,
+ )
extra_deps = [
dep
for dep in marshal_deps
if not dep in all_deps
]
all_deps = all_deps + extra_deps
- all_srcs = srcs + [name + "_abi_autogen_unsafe.go"]
+ all_srcs = all_srcs + [
+ name + suffix + "_abi_autogen_unsafe.go"
+ for suffix in marshal_sets.keys()
+ ]
_go_library(
name = name,
@@ -114,13 +153,16 @@ def go_library(name, srcs, deps = [], imports = [], stateify = True, marshal = F
# Ignore importpath for go_test.
kwargs.pop("importpath", None)
- _go_test(
- name = name + "_abi_autogen_test",
- srcs = [name + "_abi_autogen_test.go"],
- library = ":" + name,
- deps = marshal_test_deps,
- **kwargs
- )
+ # See above.
+ marshal_sets = calculate_sets(srcs)
+ for (suffix, srcs) in marshal_sets.items():
+ _go_test(
+ name = name + suffix + "_abi_autogen_test",
+ srcs = [name + suffix + "_abi_autogen_test.go"],
+ library = ":" + name + suffix,
+ deps = marshal_test_deps,
+ **kwargs
+ )
def proto_library(name, srcs, **kwargs):
"""Wraps the standard proto_library.
diff --git a/tools/go_marshal/gomarshal/BUILD b/tools/go_marshal/gomarshal/BUILD
index c92b59dd6..b5d5a4487 100644
--- a/tools/go_marshal/gomarshal/BUILD
+++ b/tools/go_marshal/gomarshal/BUILD
@@ -14,4 +14,5 @@ go_library(
visibility = [
"//:sandbox",
],
+ deps = ["//tools/tags"],
)
diff --git a/tools/go_marshal/gomarshal/generator.go b/tools/go_marshal/gomarshal/generator.go
index af90bdecb..0b3f600fe 100644
--- a/tools/go_marshal/gomarshal/generator.go
+++ b/tools/go_marshal/gomarshal/generator.go
@@ -23,6 +23,9 @@ import (
"go/token"
"os"
"sort"
+ "strings"
+
+ "gvisor.dev/gvisor/tools/tags"
)
const (
@@ -104,6 +107,14 @@ func NewGenerator(srcs []string, out, outTest, pkg string, imports []string) (*G
func (g *Generator) writeHeader() error {
var b sourceBuffer
b.emit("// Automatically generated marshal implementation. See tools/go_marshal.\n\n")
+
+ // Emit build tags.
+ if t := tags.Aggregate(g.inputs); len(t) > 0 {
+ b.emit(strings.Join(t.Lines(), "\n"))
+ b.emit("\n")
+ }
+
+ // Package header.
b.emit("package %s\n\n", g.pkg)
if err := b.write(g.output); err != nil {
return err
diff --git a/tools/go_stateify/BUILD b/tools/go_stateify/BUILD
index a133d6f8b..6036faf7b 100644
--- a/tools/go_stateify/BUILD
+++ b/tools/go_stateify/BUILD
@@ -6,4 +6,5 @@ go_binary(
name = "stateify",
srcs = ["main.go"],
visibility = ["//visibility:public"],
+ deps = ["//tools/tags"],
)
diff --git a/tools/go_stateify/defs.bzl b/tools/go_stateify/defs.bzl
index 0f261d89f..bdb966362 100644
--- a/tools/go_stateify/defs.bzl
+++ b/tools/go_stateify/defs.bzl
@@ -7,7 +7,6 @@ def _go_stateify_impl(ctx):
# Run the stateify command.
args = ["-output=%s" % output.path]
args.append("-pkg=%s" % ctx.attr.package)
- args.append("-arch=%s" % ctx.attr.arch)
if ctx.attr._statepkg:
args.append("-statepkg=%s" % ctx.attr._statepkg)
if ctx.attr.imports:
@@ -47,15 +46,8 @@ for statified types.
doc = "The package name for the input sources.",
mandatory = True,
),
- "arch": attr.string(
- doc = "Target platform.",
- mandatory = True,
- ),
"out": attr.output(
- doc = """
-The name of the generated file output. This must not conflict with any other
-files and must be added to the srcs of the relevant go_library.
-""",
+ doc = "Name of the generator output file.",
mandatory = True,
),
"_tool": attr.label(
diff --git a/tools/go_stateify/main.go b/tools/go_stateify/main.go
index 7d5d291e6..aa9d4543e 100644
--- a/tools/go_stateify/main.go
+++ b/tools/go_stateify/main.go
@@ -22,12 +22,12 @@ import (
"go/ast"
"go/parser"
"go/token"
- "io/ioutil"
"os"
- "path/filepath"
"reflect"
"strings"
"sync"
+
+ "gvisor.dev/gvisor/tools/tags"
)
var (
@@ -35,113 +35,8 @@ var (
imports = flag.String("imports", "", "extra imports for the output file")
output = flag.String("output", "", "output file")
statePkg = flag.String("statepkg", "", "state import package; defaults to empty")
- arch = flag.String("arch", "", "specify the target platform")
)
-// The known architectures.
-var okgoarch = []string{
- "386",
- "amd64",
- "arm",
- "arm64",
- "mips",
- "mipsle",
- "mips64",
- "mips64le",
- "ppc64",
- "ppc64le",
- "riscv64",
- "s390x",
- "sparc64",
- "wasm",
-}
-
-// readfile returns the content of the named file.
-func readfile(file string) string {
- data, err := ioutil.ReadFile(file)
- if err != nil {
- panic(fmt.Sprintf("readfile err: %v", err))
- }
- return string(data)
-}
-
-// matchfield reports whether the field (x,y,z) matches this build.
-// all the elements in the field must be satisfied.
-func matchfield(f string, goarch string) bool {
- for _, tag := range strings.Split(f, ",") {
- if !matchtag(tag, goarch) {
- return false
- }
- }
- return true
-}
-
-// matchtag reports whether the tag (x or !x) matches this build.
-func matchtag(tag string, goarch string) bool {
- if tag == "" {
- return false
- }
- if tag[0] == '!' {
- if len(tag) == 1 || tag[1] == '!' {
- return false
- }
- return !matchtag(tag[1:], goarch)
- }
- return tag == goarch
-}
-
-// canBuild reports whether we can build this file for target platform by
-// checking file name and build tags. The code is derived from the Go source
-// cmd.dist.build.shouldbuild.
-func canBuild(file, goTargetArch string) bool {
- name := filepath.Base(file)
- excluded := func(list []string, ok string) bool {
- for _, x := range list {
- if x == ok || (ok == "android" && x == "linux") || (ok == "illumos" && x == "solaris") {
- continue
- }
- i := strings.Index(name, x)
- if i <= 0 || name[i-1] != '_' {
- continue
- }
- i += len(x)
- if i == len(name) || name[i] == '.' || name[i] == '_' {
- return true
- }
- }
- return false
- }
- if excluded(okgoarch, goTargetArch) {
- return false
- }
-
- // Check file contents for // +build lines.
- for _, p := range strings.Split(readfile(file), "\n") {
- p = strings.TrimSpace(p)
- if p == "" {
- continue
- }
- if !strings.HasPrefix(p, "//") {
- break
- }
- if !strings.Contains(p, "+build") {
- continue
- }
- fields := strings.Fields(p[2:])
- if len(fields) < 1 || fields[0] != "+build" {
- continue
- }
- for _, p := range fields[1:] {
- if matchfield(p, goTargetArch) {
- goto fieldmatch
- }
- }
- return false
- fieldmatch:
- }
- return true
-}
-
// resolveTypeName returns a qualified type name.
func resolveTypeName(name string, typ ast.Expr) (field string, qualified string) {
for done := false; !done; {
@@ -329,8 +224,15 @@ func main() {
fmt.Fprintf(outputFile, " m.Save(\"%s\", &x.%s)\n", name, name)
}
- // Emit the package name.
+ // Automated warning.
fmt.Fprint(outputFile, "// automatically generated by stateify.\n\n")
+
+ // Emit build tags.
+ if t := tags.Aggregate(flag.Args()); len(t) > 0 {
+ fmt.Fprintf(outputFile, "%s\n\n", strings.Join(t.Lines(), "\n"))
+ }
+
+ // Emit the package name.
fmt.Fprintf(outputFile, "package %s\n\n", *pkg)
// Emit the imports lazily.
@@ -364,10 +266,6 @@ func main() {
os.Exit(1)
}
- if !canBuild(filename, *arch) {
- continue
- }
-
files = append(files, f)
}
diff --git a/tools/images/BUILD b/tools/images/BUILD
index f1699b184..fe11f08a3 100644
--- a/tools/images/BUILD
+++ b/tools/images/BUILD
@@ -1,4 +1,4 @@
-load("//tools:defs.bzl", "cc_binary")
+load("//tools:defs.bzl", "cc_binary", "gtest")
load("//tools/images:defs.bzl", "vm_image", "vm_test")
package(
@@ -32,8 +32,8 @@ cc_binary(
srcs = ["test.cc"],
linkstatic = 1,
deps = [
+ gtest,
"//test/util:test_main",
- "@com_google_googletest//:gtest",
],
)
diff --git a/tools/tags/BUILD b/tools/tags/BUILD
new file mode 100644
index 000000000..1c02e2c89
--- /dev/null
+++ b/tools/tags/BUILD
@@ -0,0 +1,11 @@
+load("//tools:defs.bzl", "go_library")
+
+package(licenses = ["notice"])
+
+go_library(
+ name = "tags",
+ srcs = ["tags.go"],
+ marshal = False,
+ stateify = False,
+ visibility = ["//tools:__subpackages__"],
+)
diff --git a/tools/tags/tags.go b/tools/tags/tags.go
new file mode 100644
index 000000000..f35904e0a
--- /dev/null
+++ b/tools/tags/tags.go
@@ -0,0 +1,89 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package tags is a utility for parsing build tags.
+package tags
+
+import (
+ "fmt"
+ "io/ioutil"
+ "strings"
+)
+
+// OrSet is a set of tags on a single line.
+//
+// Note that tags may include ",", and we don't distinguish this case in the
+// logic below. Ideally, this constraints can be split into separate top-level
+// build tags in order to resolve any issues.
+type OrSet []string
+
+// Line returns the line for this or.
+func (or OrSet) Line() string {
+ return fmt.Sprintf("// +build %s", strings.Join([]string(or), " "))
+}
+
+// AndSet is the set of all OrSets.
+type AndSet []OrSet
+
+// Lines returns the lines to be printed.
+func (and AndSet) Lines() (ls []string) {
+ for _, or := range and {
+ ls = append(ls, or.Line())
+ }
+ return
+}
+
+// Join joins this AndSet with another.
+func (and AndSet) Join(other AndSet) AndSet {
+ return append(and, other...)
+}
+
+// Tags returns the unique set of +build tags.
+//
+// Derived form the runtime's canBuild.
+func Tags(file string) (tags AndSet) {
+ data, err := ioutil.ReadFile(file)
+ if err != nil {
+ return nil
+ }
+ // Check file contents for // +build lines.
+ for _, p := range strings.Split(string(data), "\n") {
+ p = strings.TrimSpace(p)
+ if p == "" {
+ continue
+ }
+ if !strings.HasPrefix(p, "//") {
+ break
+ }
+ if !strings.Contains(p, "+build") {
+ continue
+ }
+ fields := strings.Fields(p[2:])
+ if len(fields) < 1 || fields[0] != "+build" {
+ continue
+ }
+ tags = append(tags, OrSet(fields[1:]))
+ }
+ return tags
+}
+
+// Aggregate aggregates all tags from a set of files.
+//
+// Note that these may be in conflict, in which case the build will fail.
+func Aggregate(files []string) (tags AndSet) {
+ for _, file := range files {
+ tags = tags.Join(Tags(file))
+ }
+ return tags
+}