summaryrefslogtreecommitdiffhomepage
path: root/test/syscalls
diff options
context:
space:
mode:
authorBrian Geffon <bgeffon@google.com>2018-12-10 14:41:40 -0800
committerShentubot <shentubot@google.com>2018-12-10 14:42:34 -0800
commitd3bc79bc8438206ac6a14fde4eaa288fc07eee82 (patch)
treee820398591bfd1503456e877fa0c2bdd0f994959 /test/syscalls
parent833edbd10b49db1f934dcb2495dcb41c1310eea4 (diff)
Open source system call tests.
PiperOrigin-RevId: 224886231 Change-Id: I0fccb4d994601739d8b16b1d4e6b31f40297fb22
Diffstat (limited to 'test/syscalls')
-rw-r--r--test/syscalls/BUILD522
-rw-r--r--test/syscalls/README.md110
-rw-r--r--test/syscalls/build_defs.bzl54
-rw-r--r--test/syscalls/gtest/BUILD12
-rw-r--r--test/syscalls/gtest/gtest.go93
-rw-r--r--test/syscalls/linux/32bit.cc226
-rw-r--r--test/syscalls/linux/BUILD2951
-rw-r--r--test/syscalls/linux/accept_bind.cc600
-rw-r--r--test/syscalls/linux/accept_bind_stream.cc93
-rw-r--r--test/syscalls/linux/access.cc170
-rw-r--r--test/syscalls/linux/affinity.cc241
-rw-r--r--test/syscalls/linux/aio.cc433
-rw-r--r--test/syscalls/linux/alarm.cc193
-rw-r--r--test/syscalls/linux/arch_prctl.cc48
-rw-r--r--test/syscalls/linux/bad.cc39
-rw-r--r--test/syscalls/linux/base_poll_test.cc65
-rw-r--r--test/syscalls/linux/base_poll_test.h101
-rw-r--r--test/syscalls/linux/bind.cc146
-rw-r--r--test/syscalls/linux/brk.cc31
-rw-r--r--test/syscalls/linux/chdir.cc69
-rw-r--r--test/syscalls/linux/chmod.cc262
-rw-r--r--test/syscalls/linux/chown.cc200
-rw-r--r--test/syscalls/linux/chroot.cc364
-rw-r--r--test/syscalls/linux/clock_getres.cc37
-rw-r--r--test/syscalls/linux/clock_gettime.cc156
-rw-r--r--test/syscalls/linux/clock_nanosleep.cc153
-rw-r--r--test/syscalls/linux/concurrency.cc124
-rw-r--r--test/syscalls/linux/creat.cc57
-rw-r--r--test/syscalls/linux/dev.cc149
-rw-r--r--test/syscalls/linux/dup.cc139
-rw-r--r--test/syscalls/linux/epoll.cc468
-rw-r--r--test/syscalls/linux/eventfd.cc189
-rw-r--r--test/syscalls/linux/exceptions.cc146
-rw-r--r--test/syscalls/linux/exec.cc625
-rw-r--r--test/syscalls/linux/exec.h34
-rw-r--r--test/syscalls/linux/exec_assert_closed_workload.cc45
-rw-r--r--test/syscalls/linux/exec_basic_workload.cc31
-rw-r--r--test/syscalls/linux/exec_binary.cc1367
-rw-r--r--test/syscalls/linux/exec_proc_exe_workload.cc35
-rw-r--r--test/syscalls/linux/exec_state_workload.cc202
-rw-r--r--test/syscalls/linux/exit.cc77
-rwxr-xr-xtest/syscalls/linux/exit_script.sh22
-rw-r--r--test/syscalls/linux/fadvise64.cc72
-rw-r--r--test/syscalls/linux/fallocate.cc57
-rw-r--r--test/syscalls/linux/fault.cc71
-rw-r--r--test/syscalls/linux/fchdir.cc77
-rw-r--r--test/syscalls/linux/fcntl.cc978
-rw-r--r--test/syscalls/linux/file_base.h206
-rw-r--r--test/syscalls/linux/flock.cc588
-rw-r--r--test/syscalls/linux/fork.cc413
-rw-r--r--test/syscalls/linux/fpsig_fork.cc105
-rw-r--r--test/syscalls/linux/fpsig_nested.cc134
-rw-r--r--test/syscalls/linux/fsync.cc55
-rw-r--r--test/syscalls/linux/futex.cc595
-rw-r--r--test/syscalls/linux/getcpu.cc40
-rw-r--r--test/syscalls/linux/getdents.cc485
-rw-r--r--test/syscalls/linux/getrandom.cc61
-rw-r--r--test/syscalls/linux/getrusage.cc177
-rw-r--r--test/syscalls/linux/inotify.cc1489
-rw-r--r--test/syscalls/linux/ioctl.cc375
-rw-r--r--test/syscalls/linux/ip_socket_test_util.cc78
-rw-r--r--test/syscalls/linux/ip_socket_test_util.h57
-rw-r--r--test/syscalls/linux/itimer.cc342
-rw-r--r--test/syscalls/linux/kill.cc380
-rw-r--r--test/syscalls/linux/link.cc291
-rw-r--r--test/syscalls/linux/lseek.cc202
-rw-r--r--test/syscalls/linux/madvise.cc142
-rw-r--r--test/syscalls/linux/memory_accounting.cc99
-rw-r--r--test/syscalls/linux/mempolicy.cc258
-rw-r--r--test/syscalls/linux/mincore.cc96
-rw-r--r--test/syscalls/linux/mkdir.cc96
-rw-r--r--test/syscalls/linux/mknod.cc173
-rw-r--r--test/syscalls/linux/mmap.cc1714
-rw-r--r--test/syscalls/linux/mount.cc302
-rw-r--r--test/syscalls/linux/mremap.cc514
-rw-r--r--test/syscalls/linux/msync.cc145
-rw-r--r--test/syscalls/linux/munmap.cc53
-rw-r--r--test/syscalls/linux/open.cc340
-rw-r--r--test/syscalls/linux/open_create.cc130
-rw-r--r--test/syscalls/linux/partial_bad_buffer.cc305
-rw-r--r--test/syscalls/linux/pause.cc88
-rw-r--r--test/syscalls/linux/pipe.cc480
-rw-r--r--test/syscalls/linux/poll.cc279
-rw-r--r--test/syscalls/linux/ppoll.cc155
-rw-r--r--test/syscalls/linux/prctl.cc171
-rw-r--r--test/syscalls/linux/prctl_setuid.cc262
-rw-r--r--test/syscalls/linux/pread64.cc152
-rw-r--r--test/syscalls/linux/preadv.cc94
-rw-r--r--test/syscalls/linux/preadv2.cc217
-rw-r--r--test/syscalls/linux/priority.cc215
-rw-r--r--test/syscalls/linux/priority_execve.cc42
-rw-r--r--test/syscalls/linux/proc.cc1830
-rw-r--r--test/syscalls/linux/proc_net.cc59
-rw-r--r--test/syscalls/linux/pselect.cc190
-rw-r--r--test/syscalls/linux/ptrace.cc948
-rw-r--r--test/syscalls/linux/pty.cc1230
-rw-r--r--test/syscalls/linux/pwrite64.cc79
-rw-r--r--test/syscalls/linux/read.cc117
-rw-r--r--test/syscalls/linux/readv.cc293
-rw-r--r--test/syscalls/linux/readv_common.cc180
-rw-r--r--test/syscalls/linux/readv_common.h61
-rw-r--r--test/syscalls/linux/readv_socket.cc182
-rw-r--r--test/syscalls/linux/rename.cc373
-rw-r--r--test/syscalls/linux/rlimits.cc61
-rw-r--r--test/syscalls/linux/rtsignal.cc172
-rw-r--r--test/syscalls/linux/sched.cc71
-rw-r--r--test/syscalls/linux/sched_yield.cc33
-rw-r--r--test/syscalls/linux/seccomp.cc374
-rw-r--r--test/syscalls/linux/select.cc128
-rw-r--r--test/syscalls/linux/semaphore.cc438
-rw-r--r--test/syscalls/linux/sendfile.cc409
-rw-r--r--test/syscalls/linux/sendfile_socket.cc156
-rw-r--r--test/syscalls/linux/shm.cc445
-rw-r--r--test/syscalls/linux/sigaction.cc70
-rw-r--r--test/syscalls/linux/sigaltstack.cc274
-rw-r--r--test/syscalls/linux/sigaltstack_check.cc33
-rw-r--r--test/syscalls/linux/sigiret.cc137
-rw-r--r--test/syscalls/linux/sigprocmask.cc272
-rw-r--r--test/syscalls/linux/sigstop.cc150
-rw-r--r--test/syscalls/linux/sigtimedwait.cc248
-rw-r--r--test/syscalls/linux/socket_abstract.cc43
-rw-r--r--test/syscalls/linux/socket_filesystem.cc43
-rw-r--r--test/syscalls/linux/socket_generic.cc403
-rw-r--r--test/syscalls/linux/socket_generic.h30
-rw-r--r--test/syscalls/linux/socket_inet_loopback.cc812
-rw-r--r--test/syscalls/linux/socket_ip_tcp_generic.cc392
-rw-r--r--test/syscalls/linux/socket_ip_tcp_generic.h29
-rw-r--r--test/syscalls/linux/socket_ip_tcp_generic_loopback.cc47
-rw-r--r--test/syscalls/linux/socket_ip_tcp_loopback.cc43
-rw-r--r--test/syscalls/linux/socket_ip_tcp_loopback_blocking.cc44
-rw-r--r--test/syscalls/linux/socket_ip_tcp_loopback_nonblock.cc46
-rw-r--r--test/syscalls/linux/socket_ip_tcp_udp_generic.cc78
-rw-r--r--test/syscalls/linux/socket_ip_udp_loopback.cc48
-rw-r--r--test/syscalls/linux/socket_ip_udp_loopback_blocking.cc40
-rw-r--r--test/syscalls/linux/socket_ip_udp_loopback_nonblock.cc42
-rw-r--r--test/syscalls/linux/socket_netdevice.cc182
-rw-r--r--test/syscalls/linux/socket_netlink_route.cc314
-rw-r--r--test/syscalls/linux/socket_netlink_util.cc100
-rw-r--r--test/syscalls/linux/socket_netlink_util.h42
-rw-r--r--test/syscalls/linux/socket_non_blocking.cc63
-rw-r--r--test/syscalls/linux/socket_non_blocking.h29
-rw-r--r--test/syscalls/linux/socket_non_stream.cc174
-rw-r--r--test/syscalls/linux/socket_non_stream.h29
-rw-r--r--test/syscalls/linux/socket_non_stream_blocking.cc51
-rw-r--r--test/syscalls/linux/socket_non_stream_blocking.h30
-rw-r--r--test/syscalls/linux/socket_stream.cc99
-rw-r--r--test/syscalls/linux/socket_stream.h30
-rw-r--r--test/syscalls/linux/socket_stream_blocking.cc131
-rw-r--r--test/syscalls/linux/socket_stream_blocking.h30
-rw-r--r--test/syscalls/linux/socket_stream_nonblock.cc50
-rw-r--r--test/syscalls/linux/socket_stream_nonblock.h30
-rw-r--r--test/syscalls/linux/socket_test_util.cc660
-rw-r--r--test/syscalls/linux/socket_test_util.h449
-rw-r--r--test/syscalls/linux/socket_unix.cc1181
-rw-r--r--test/syscalls/linux/socket_unix.h29
-rw-r--r--test/syscalls/linux/socket_unix_abstract.cc38
-rw-r--r--test/syscalls/linux/socket_unix_abstract_nonblock.cc38
-rw-r--r--test/syscalls/linux/socket_unix_dgram.cc45
-rw-r--r--test/syscalls/linux/socket_unix_dgram.h29
-rw-r--r--test/syscalls/linux/socket_unix_dgram_local.cc59
-rw-r--r--test/syscalls/linux/socket_unix_dgram_non_blocking.cc68
-rw-r--r--test/syscalls/linux/socket_unix_domain.cc38
-rw-r--r--test/syscalls/linux/socket_unix_filesystem.cc38
-rw-r--r--test/syscalls/linux/socket_unix_filesystem_nonblock.cc38
-rw-r--r--test/syscalls/linux/socket_unix_non_stream.cc229
-rw-r--r--test/syscalls/linux/socket_unix_non_stream.h30
-rw-r--r--test/syscalls/linux/socket_unix_non_stream_blocking_local.cc47
-rw-r--r--test/syscalls/linux/socket_unix_pair.cc38
-rw-r--r--test/syscalls/linux/socket_unix_pair_nonblock.cc38
-rw-r--r--test/syscalls/linux/socket_unix_seqpacket.cc49
-rw-r--r--test/syscalls/linux/socket_unix_seqpacket.h30
-rw-r--r--test/syscalls/linux/socket_unix_seqpacket_local.cc59
-rw-r--r--test/syscalls/linux/socket_unix_stream.cc69
-rw-r--r--test/syscalls/linux/socket_unix_stream_blocking_local.cc47
-rw-r--r--test/syscalls/linux/socket_unix_stream_local.cc49
-rw-r--r--test/syscalls/linux/socket_unix_stream_nonblock_local.cc49
-rw-r--r--test/syscalls/linux/socket_unix_unbound_abstract.cc116
-rw-r--r--test/syscalls/linux/socket_unix_unbound_dgram.cc162
-rw-r--r--test/syscalls/linux/socket_unix_unbound_filesystem.cc84
-rw-r--r--test/syscalls/linux/socket_unix_unbound_seqpacket.cc91
-rw-r--r--test/syscalls/linux/socket_unix_unbound_stream.cc738
-rw-r--r--test/syscalls/linux/stat.cc410
-rw-r--r--test/syscalls/linux/stat_times.cc220
-rw-r--r--test/syscalls/linux/statfs.cc81
-rw-r--r--test/syscalls/linux/sticky.cc116
-rw-r--r--test/syscalls/linux/symlink.cc288
-rw-r--r--test/syscalls/linux/sync.cc60
-rw-r--r--test/syscalls/linux/sync_file_range.cc111
-rw-r--r--test/syscalls/linux/sysinfo.cc86
-rw-r--r--test/syscalls/linux/syslog.cc51
-rw-r--r--test/syscalls/linux/sysret.cc113
-rw-r--r--test/syscalls/linux/tcp_socket.cc759
-rw-r--r--test/syscalls/linux/temp_umask.h39
-rw-r--r--test/syscalls/linux/tgkill.cc48
-rw-r--r--test/syscalls/linux/time.cc103
-rw-r--r--test/syscalls/linux/timerfd.cc238
-rw-r--r--test/syscalls/linux/timers.cc642
-rw-r--r--test/syscalls/linux/tkill.cc75
-rw-r--r--test/syscalls/linux/truncate.cc217
-rw-r--r--test/syscalls/linux/udp_bind.cc316
-rw-r--r--test/syscalls/linux/udp_socket.cc941
-rw-r--r--test/syscalls/linux/uidgid.cc277
-rw-r--r--test/syscalls/linux/uname.cc99
-rw-r--r--test/syscalls/linux/unix_domain_socket_test_util.cc346
-rw-r--r--test/syscalls/linux/unix_domain_socket_test_util.h161
-rw-r--r--test/syscalls/linux/unlink.cc211
-rw-r--r--test/syscalls/linux/unshare.cc50
-rw-r--r--test/syscalls/linux/utimes.cc330
-rw-r--r--test/syscalls/linux/vdso.cc48
-rw-r--r--test/syscalls/linux/vdso_clock_gettime.cc104
-rw-r--r--test/syscalls/linux/vfork.cc193
-rw-r--r--test/syscalls/linux/vsyscall.cc44
-rw-r--r--test/syscalls/linux/wait.cc748
-rw-r--r--test/syscalls/linux/write.cc134
-rw-r--r--test/syscalls/syscall_test.go245
-rwxr-xr-xtest/syscalls/syscall_test_runner.sh25
216 files changed, 50030 insertions, 0 deletions
diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD
new file mode 100644
index 000000000..318d80393
--- /dev/null
+++ b/test/syscalls/BUILD
@@ -0,0 +1,522 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_test")
+
+package(licenses = ["notice"]) # Apache 2.0
+
+load("//test/syscalls:build_defs.bzl", "syscall_test")
+
+syscall_test(test = "//test/syscalls/linux:32bit_test")
+
+syscall_test(test = "//test/syscalls/linux:accept_bind_stream_test")
+
+syscall_test(
+ size = "enormous",
+ test = "//test/syscalls/linux:accept_bind_test",
+)
+
+syscall_test(test = "//test/syscalls/linux:access_test")
+
+syscall_test(test = "//test/syscalls/linux:affinity_test")
+
+syscall_test(test = "//test/syscalls/linux:aio_test")
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:alarm_test",
+)
+
+syscall_test(test = "//test/syscalls/linux:arch_prctl_test")
+
+syscall_test(test = "//test/syscalls/linux:bad_test")
+
+syscall_test(
+ size = "large",
+ test = "//test/syscalls/linux:bind_test",
+)
+
+syscall_test(test = "//test/syscalls/linux:brk_test")
+
+syscall_test(test = "//test/syscalls/linux:chdir_test")
+
+syscall_test(test = "//test/syscalls/linux:chmod_test")
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:chown_test",
+)
+
+syscall_test(test = "//test/syscalls/linux:chroot_test")
+
+syscall_test(test = "//test/syscalls/linux:clock_getres_test")
+
+syscall_test(test = "//test/syscalls/linux:clock_gettime_test")
+
+syscall_test(test = "//test/syscalls/linux:clock_nanosleep_test")
+
+syscall_test(test = "//test/syscalls/linux:concurrency_test")
+
+syscall_test(test = "//test/syscalls/linux:creat_test")
+
+syscall_test(test = "//test/syscalls/linux:dev_test")
+
+syscall_test(test = "//test/syscalls/linux:dup_test")
+
+syscall_test(test = "//test/syscalls/linux:epoll_test")
+
+syscall_test(test = "//test/syscalls/linux:eventfd_test")
+
+syscall_test(test = "//test/syscalls/linux:exceptions_test")
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:exec_test",
+)
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:exec_binary_test",
+)
+
+syscall_test(test = "//test/syscalls/linux:exit_test")
+
+syscall_test(test = "//test/syscalls/linux:fadvise64_test")
+
+syscall_test(test = "//test/syscalls/linux:fallocate_test")
+
+syscall_test(test = "//test/syscalls/linux:fault_test")
+
+syscall_test(test = "//test/syscalls/linux:fchdir_test")
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:fcntl_test",
+)
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:flock_test",
+)
+
+syscall_test(test = "//test/syscalls/linux:fork_test")
+
+syscall_test(test = "//test/syscalls/linux:fpsig_fork_test")
+
+syscall_test(test = "//test/syscalls/linux:fpsig_nested_test")
+
+syscall_test(test = "//test/syscalls/linux:fsync_test")
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:futex_test",
+)
+
+syscall_test(test = "//test/syscalls/linux:getcpu_host_test")
+
+syscall_test(test = "//test/syscalls/linux:getcpu_test")
+
+syscall_test(test = "//test/syscalls/linux:getdents_test")
+
+syscall_test(test = "//test/syscalls/linux:getrandom_test")
+
+syscall_test(test = "//test/syscalls/linux:getrusage_test")
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:inotify_test",
+)
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:ioctl_test",
+)
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:itimer_test",
+)
+
+syscall_test(test = "//test/syscalls/linux:kill_test")
+
+syscall_test(test = "//test/syscalls/linux:link_test")
+
+syscall_test(test = "//test/syscalls/linux:lseek_test")
+
+syscall_test(test = "//test/syscalls/linux:madvise_test")
+
+syscall_test(test = "//test/syscalls/linux:memory_accounting_test")
+
+syscall_test(test = "//test/syscalls/linux:mempolicy_test")
+
+syscall_test(test = "//test/syscalls/linux:mincore_test")
+
+syscall_test(test = "//test/syscalls/linux:mkdir_test")
+
+syscall_test(test = "//test/syscalls/linux:mknod_test")
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:mmap_test",
+)
+
+syscall_test(test = "//test/syscalls/linux:mount_test")
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:mremap_test",
+)
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:msync_test",
+)
+
+syscall_test(test = "//test/syscalls/linux:munmap_test")
+
+syscall_test(test = "//test/syscalls/linux:open_create_test")
+
+syscall_test(test = "//test/syscalls/linux:open_test")
+
+syscall_test(test = "//test/syscalls/linux:partial_bad_buffer_test")
+
+syscall_test(test = "//test/syscalls/linux:pause_test")
+
+syscall_test(test = "//test/syscalls/linux:pipe_test")
+
+syscall_test(test = "//test/syscalls/linux:poll_test")
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:ppoll_test",
+)
+
+syscall_test(test = "//test/syscalls/linux:prctl_setuid_test")
+
+syscall_test(test = "//test/syscalls/linux:prctl_test")
+
+syscall_test(test = "//test/syscalls/linux:pread64_test")
+
+syscall_test(test = "//test/syscalls/linux:preadv_test")
+
+syscall_test(test = "//test/syscalls/linux:preadv2_test")
+
+syscall_test(test = "//test/syscalls/linux:priority_test")
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:proc_test",
+)
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:pselect_test",
+)
+
+syscall_test(test = "//test/syscalls/linux:ptrace_test")
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:pty_test",
+)
+
+syscall_test(test = "//test/syscalls/linux:pwrite64_test")
+
+syscall_test(test = "//test/syscalls/linux:read_test")
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:readv_socket_test",
+)
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:readv_test",
+)
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:rename_test",
+)
+
+syscall_test(test = "//test/syscalls/linux:rlimits_test")
+
+syscall_test(test = "//test/syscalls/linux:rtsignal_test")
+
+syscall_test(test = "//test/syscalls/linux:sched_test")
+
+syscall_test(test = "//test/syscalls/linux:sched_yield_test")
+
+syscall_test(test = "//test/syscalls/linux:seccomp_test")
+
+syscall_test(test = "//test/syscalls/linux:select_test")
+
+syscall_test(test = "//test/syscalls/linux:semaphore_test")
+
+syscall_test(test = "//test/syscalls/linux:sendfile_socket_test")
+
+syscall_test(test = "//test/syscalls/linux:sendfile_test")
+
+syscall_test(test = "//test/syscalls/linux:sigaction_test")
+
+# TODO: Enable once the test passes in runsc.
+# syscall_test(test = "//test/syscalls/linux:sigaltstack_test")
+
+syscall_test(test = "//test/syscalls/linux:sigiret_test")
+
+syscall_test(test = "//test/syscalls/linux:sigprocmask_test")
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:sigstop_test",
+)
+
+syscall_test(test = "//test/syscalls/linux:sigtimedwait_test")
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:shm_test",
+)
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:socket_abstract_non_blocking_test",
+)
+
+syscall_test(
+ size = "enormous",
+ test = "//test/syscalls/linux:socket_abstract_test",
+)
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:socket_domain_non_blocking_test",
+)
+
+syscall_test(
+ size = "enormous",
+ test = "//test/syscalls/linux:socket_domain_test",
+)
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:socket_filesystem_non_blocking_test",
+)
+
+syscall_test(
+ size = "enormous",
+ test = "//test/syscalls/linux:socket_filesystem_test",
+)
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:socket_inet_loopback_test",
+)
+
+syscall_test(
+ size = "large",
+ test = "//test/syscalls/linux:socket_ip_tcp_generic_loopback_test",
+)
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:socket_ip_tcp_loopback_non_blocking_test",
+)
+
+syscall_test(
+ size = "large",
+ test = "//test/syscalls/linux:socket_ip_tcp_loopback_test",
+)
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:socket_ip_tcp_udp_generic_loopback_test",
+)
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:socket_ip_udp_loopback_non_blocking_test",
+)
+
+syscall_test(
+ size = "large",
+ test = "//test/syscalls/linux:socket_ip_udp_loopback_test",
+)
+
+syscall_test(test = "//test/syscalls/linux:socket_netdevice_test")
+
+syscall_test(test = "//test/syscalls/linux:socket_netlink_route_test")
+
+syscall_test(test = "//test/syscalls/linux:socket_non_stream_blocking_local_test")
+
+syscall_test(test = "//test/syscalls/linux:socket_non_stream_blocking_udp_test")
+
+syscall_test(
+ size = "large",
+ test = "//test/syscalls/linux:socket_stream_blocking_local_test",
+)
+
+syscall_test(
+ size = "large",
+ test = "//test/syscalls/linux:socket_stream_blocking_tcp_test",
+)
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:socket_stream_local_test",
+)
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:socket_stream_nonblock_local_test",
+)
+
+syscall_test(
+ size = "enormous",
+ test = "//test/syscalls/linux:socket_unix_abstract_test",
+)
+
+syscall_test(
+ # NOTE: Large sendmsg may stall a long time.
+ size = "enormous",
+ test = "//test/syscalls/linux:socket_unix_dgram_local_test",
+)
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:socket_unix_dgram_non_blocking_test",
+)
+
+syscall_test(
+ size = "enormous",
+ test = "//test/syscalls/linux:socket_unix_filesystem_test",
+)
+
+syscall_test(
+ size = "enormous",
+ test = "//test/syscalls/linux:socket_unix_pair_test",
+)
+
+syscall_test(
+ # NOTE: Large sendmsg may stall a long time.
+ size = "enormous",
+ test = "//test/syscalls/linux:socket_unix_seqpacket_local_test",
+)
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:socket_unix_stream_test",
+)
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:socket_unix_unbound_abstract_test",
+)
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:socket_unix_unbound_dgram_test",
+)
+
+syscall_test(test = "//test/syscalls/linux:socket_unix_unbound_filesystem_test")
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:socket_unix_unbound_seqpacket_test",
+)
+
+syscall_test(
+ size = "large",
+ test = "//test/syscalls/linux:socket_unix_unbound_stream_test",
+)
+
+syscall_test(test = "//test/syscalls/linux:statfs_test")
+
+syscall_test(test = "//test/syscalls/linux:stat_test")
+
+syscall_test(test = "//test/syscalls/linux:stat_times_test")
+
+syscall_test(test = "//test/syscalls/linux:sticky_test")
+
+syscall_test(test = "//test/syscalls/linux:symlink_test")
+
+syscall_test(test = "//test/syscalls/linux:sync_test")
+
+syscall_test(test = "//test/syscalls/linux:sync_file_range_test")
+
+syscall_test(test = "//test/syscalls/linux:sysinfo_test")
+
+syscall_test(test = "//test/syscalls/linux:syslog_test")
+
+syscall_test(test = "//test/syscalls/linux:sysret_test")
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:tcp_socket_test",
+)
+
+syscall_test(test = "//test/syscalls/linux:tgkill_test")
+
+syscall_test(test = "//test/syscalls/linux:timerfd_test")
+
+syscall_test(test = "//test/syscalls/linux:timers_test")
+
+syscall_test(test = "//test/syscalls/linux:time_test")
+
+syscall_test(test = "//test/syscalls/linux:tkill_test")
+
+syscall_test(test = "//test/syscalls/linux:truncate_test")
+
+syscall_test(test = "//test/syscalls/linux:udp_bind_test")
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:udp_socket_test",
+)
+
+syscall_test(test = "//test/syscalls/linux:uidgid_test")
+
+syscall_test(test = "//test/syscalls/linux:uname_test")
+
+syscall_test(test = "//test/syscalls/linux:unlink_test")
+
+syscall_test(test = "//test/syscalls/linux:unshare_test")
+
+syscall_test(test = "//test/syscalls/linux:utimes_test")
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:vdso_clock_gettime_test",
+)
+
+syscall_test(test = "//test/syscalls/linux:vdso_test")
+
+syscall_test(test = "//test/syscalls/linux:vsyscall_test")
+
+syscall_test(test = "//test/syscalls/linux:vfork_test")
+
+syscall_test(
+ size = "medium",
+ test = "//test/syscalls/linux:wait_test",
+)
+
+syscall_test(test = "//test/syscalls/linux:write_test")
+
+go_test(
+ name = "syscall_test",
+ srcs = ["syscall_test.go"],
+ data = [
+ "//runsc",
+ ],
+ # Running this test by itself does not make sense. It should only be run
+ # via the syscall_test macro.
+ tags = [
+ "manual",
+ ],
+ deps = [
+ "//pkg/log",
+ "//runsc/boot",
+ "//runsc/container",
+ "//runsc/specutils",
+ "//runsc/test/testutil",
+ "//test/syscalls/gtest",
+ "@org_golang_x_sys//unix:go_default_library",
+ ],
+)
diff --git a/test/syscalls/README.md b/test/syscalls/README.md
new file mode 100644
index 000000000..69712663c
--- /dev/null
+++ b/test/syscalls/README.md
@@ -0,0 +1,110 @@
+# gVisor system call test suite
+
+This is a test suite for Linux system calls. It runs under both gVisor and
+Linux, and ensures compatability between the two.
+
+When adding support for a new syscall (or syscall argument) to gVisor, a
+corresponding syscall test should be added. It's usually recommended to write
+the test first and make sure that it passes on Linux before making changes to
+gVisor.
+
+This document outlines the general guidelines for tests and specific rules that
+must be followed for new tests.
+
+## Running the tests
+
+Each test file generates three different test targets that run in different
+environments:
+
+* a `native` target that runs directly on the host machine,
+* a `runsc_ptrace` target that runs inside runsc using the ptrace platform, and
+* a `runsc_kvm` target that runs inside runsc using the KVM platform.
+
+For example, the test in `access_test.cc` generates the following targets:
+
+* `//test/syscalls:access_test_native`
+* `//test/syscalls:access_test_runsc_ptrace`
+* `//test/syscalls:access_test_runsc_kvm`
+
+Any of these targets can be run directly via `bazel test`.
+
+```bash
+$ bazel test //test/syscalls:access_test_native
+$ bazel test //test/syscalls:access_test_runsc_ptrace
+$ bazel test //test/syscalls:access_test_runsc_kvm
+```
+
+To run all the tests on a particular platform, you can filter by the platform
+tag:
+
+```bash
+# Run all tests in native environment:
+$ bazel test --test_tag_filter=native //test/syscalls:*
+
+# Run all tests in runsc with ptrace:
+$ bazel test --test_tag_filter=runsc_ptrace //test/syscalls:*
+
+# Run all tests in runsc with kvm:
+$ bazel test --test_tag_filter=runsc_kvm //test/syscalls:*
+```
+
+You can also run all the tests on every platform. (Warning, this may take a
+while to run.)
+
+```bash
+# Run all tests on every platform:
+$ bazel test //test/syscalls:*
+```
+
+## Writing new tests
+
+Whenever we add support for a new syscall, or add support for a new argument or
+option for a syscall, we should always add a new test (perhaps many new tests).
+
+In general, it is best to write the test first and make sure it passes on Linux
+by running the test on the `native` platform on a Linux machine. This ensures
+that the gVisor implementation matches actual Linux behavior. Sometimes man
+pages contain errors, so always check the actual Linux behavior.
+
+gVisor uses the [Google Test][googletest] test framework, with a few custom
+matchers and guidelines, described below.
+
+### Syscall matchers
+
+When testing an individual system call, use the following syscall matchers,
+which will match the value returned by the syscall and the errno.
+
+```cc
+SyscallSucceeds()
+SyscallSucceedsWithValue(...)
+SyscallFails()
+SyscallFailsWithErrno(...)
+```
+
+### Use test utilities (RAII classes)
+
+The test utilties are written as RAII classes. These utilities should be
+preferred over custom test harnesses.
+
+Local class instances should be preferred, whereever possible, over full test
+fixtures.
+
+A test utility should be created when there is more than one test that requires
+that same functionality, otherwise the class should be test local.
+
+
+## Save/Restore support in tests
+gVisor supports save/restore, and our syscall tests are written in a way to
+enable saving/restoring at certain points. Hence, there are calls to
+`MaybeSave`, and certain tests that should not trigger saves are named with
+`NoSave`.
+
+However, the current open-source test runner does not yet support triggering
+save/restore, so these functions and annotations have no effect on the
+open-source tests.
+
+We plan on extending our open-source test runner to trigger save/restore. Until
+then, these functions and annotations should be ignored.
+
+
+[googletest]: https://github.com/abseil/googletest
diff --git a/test/syscalls/build_defs.bzl b/test/syscalls/build_defs.bzl
new file mode 100644
index 000000000..31b311f63
--- /dev/null
+++ b/test/syscalls/build_defs.bzl
@@ -0,0 +1,54 @@
+"""Defines a rule for syscall test targets."""
+
+# syscall_test is a macro that will create targets to run the given test target
+# on the host (native) and runsc.
+def syscall_test(test, size = "small"):
+ _syscall_test(test, size, "native")
+ _syscall_test(test, size, "kvm")
+ _syscall_test(test, size, "ptrace")
+
+def _syscall_test(test, size, platform):
+ test_name = test.split(":")[1]
+
+ # Prepend "runsc" to non-native platform names.
+ full_platform = platform if platform == "native" else "runsc_" + platform
+
+ # Add the full_platform in a tag to make it easier to run all the tests on
+ # a specific platform.
+ tags = [full_platform]
+
+ # Add tag to prevent the tests from running in a Bazel sandbox.
+ # TODO: Make the tests run without this tag.
+ tags.append("no-sandbox")
+
+ # TODO: KVM tests are tagged "manual" to until the platform is
+ # more stable.
+ if platform == "kvm":
+ tags += ["manual"]
+
+ sh_test(
+ srcs = ["syscall_test_runner.sh"],
+ name = test_name + "_" + full_platform,
+ data = [
+ ":syscall_test",
+ test,
+ ],
+ args = [
+ # First argument is location to syscall_test binary.
+ "$(location :syscall_test)",
+ # Rest of arguments are passed directly to syscall_test binary.
+ "--test-name=" + test_name,
+ "--platform=" + platform,
+ "--debug=false",
+ "--strace=false",
+ "--parallel=true",
+ ],
+ size = size,
+ tags = tags,
+ )
+
+def sh_test(**kwargs):
+ """Wraps the standard sh_test."""
+ native.sh_test(
+ **kwargs
+ )
diff --git a/test/syscalls/gtest/BUILD b/test/syscalls/gtest/BUILD
new file mode 100644
index 000000000..d078fd3d5
--- /dev/null
+++ b/test/syscalls/gtest/BUILD
@@ -0,0 +1,12 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+
+package(licenses = ["notice"]) # Apache 2.0
+
+go_library(
+ name = "gtest",
+ srcs = ["gtest.go"],
+ importpath = "gvisor.googlesource.com/gvisor/test/syscalls/gtest",
+ visibility = [
+ "//test:__subpackages__",
+ ],
+)
diff --git a/test/syscalls/gtest/gtest.go b/test/syscalls/gtest/gtest.go
new file mode 100644
index 000000000..dfe5037cd
--- /dev/null
+++ b/test/syscalls/gtest/gtest.go
@@ -0,0 +1,93 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package gtest contains helpers for running google-test tests from Go.
+package gtest
+
+import (
+ "fmt"
+ "os/exec"
+ "strings"
+)
+
+var (
+ // ListTestFlag is the flag that will list tests in gtest binaries.
+ ListTestFlag = "--gtest_list_tests"
+
+ // FilterTestFlag is the flag that will filter tests in gtest binaries.
+ FilterTestFlag = "--gtest_filter"
+)
+
+// TestCase is a single gtest test case.
+type TestCase struct {
+ // Suite is the suite for this test.
+ Suite string
+
+ // Name is the name of this individual test.
+ Name string
+}
+
+// FullName returns the name of the test including the suite. It is suitable to
+// pass to "-gtest_filter".
+func (tc TestCase) FullName() string {
+ return fmt.Sprintf("%s.%s", tc.Suite, tc.Name)
+}
+
+// ParseTestCases calls a gtest test binary to list its test and returns a
+// slice with the name and suite of each test.
+func ParseTestCases(testBin string, extraArgs ...string) ([]TestCase, error) {
+ args := append([]string{ListTestFlag}, extraArgs...)
+ cmd := exec.Command(testBin, args...)
+ out, err := cmd.Output()
+ if err != nil {
+ exitErr, ok := err.(*exec.ExitError)
+ if !ok {
+ return nil, fmt.Errorf("could not enumerate gtest tests: %v", err)
+ }
+ return nil, fmt.Errorf("could not enumerate gtest tests: %v\nstderr:\n%s", err, exitErr.Stderr)
+ }
+
+ var t []TestCase
+ var suite string
+ for _, line := range strings.Split(string(out), "\n") {
+ // Strip comments.
+ line = strings.Split(line, "#")[0]
+
+ // New suite?
+ if !strings.HasPrefix(line, " ") {
+ suite = strings.TrimSuffix(strings.TrimSpace(line), ".")
+ continue
+ }
+
+ // Individual test.
+ name := strings.TrimSpace(line)
+
+ // Do we have a suite yet?
+ if suite == "" {
+ return nil, fmt.Errorf("test without a suite: %v", name)
+ }
+
+ // Add this individual test.
+ t = append(t, TestCase{
+ Suite: suite,
+ Name: name,
+ })
+
+ }
+
+ if len(t) == 0 {
+ return nil, fmt.Errorf("no tests parsed from %v", testBin)
+ }
+ return t, nil
+}
diff --git a/test/syscalls/linux/32bit.cc b/test/syscalls/linux/32bit.cc
new file mode 100644
index 000000000..b8d5f0355
--- /dev/null
+++ b/test/syscalls/linux/32bit.cc
@@ -0,0 +1,226 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string.h>
+#include <sys/mman.h>
+
+#include "test/util/memory_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/test_util.h"
+#include "gtest/gtest.h"
+
+#ifndef __x86_64__
+#error "This test is x86-64 specific."
+#endif
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+constexpr char kInt3 = '\xcc';
+
+constexpr char kInt80[2] = {'\xcd', '\x80'};
+constexpr char kSyscall[2] = {'\x0f', '\x05'};
+constexpr char kSysenter[2] = {'\x0f', '\x34'};
+
+void ExitGroup32(const char instruction[2], int code) {
+ const Mapping m = ASSERT_NO_ERRNO_AND_VALUE(
+ Mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0));
+
+ // Fill with INT 3 in case we execute too far.
+ memset(m.ptr(), kInt3, m.len());
+
+ memcpy(m.ptr(), instruction, 2);
+
+ // We're playing *extremely* fast-and-loose with the various syscall ABIs
+ // here, which we can more-or-less get away with since exit_group doesn't
+ // return.
+ //
+ // SYSENTER expects the user stack in (%ebp) and arg6 in 0(%ebp). The kernel
+ // will unconditionally dereference %ebp for arg6, so we must pass a valid
+ // address or it will return EFAULT.
+ //
+ // SYSENTER also unconditionally returns to thread_info->sysenter_return which
+ // is ostensibly a stub in the 32-bit VDSO. But a 64-bit binary doesn't have
+ // the 32-bit VDSO mapped, so sysenter_return will simply be the value
+ // inherited from the most recent 32-bit ancestor, or NULL if there is none.
+ // As a result, return would not return from SYSENTER.
+ asm volatile(
+ "movl $252, %%eax\n" // exit_group
+ "movl %[code], %%ebx\n" // code
+ "movl %%edx, %%ebp\n" // SYSENTER: user stack (use IP as a valid addr)
+ "leaq -20(%%rsp), %%rsp\n"
+ "movl $0x2b, 16(%%rsp)\n" // SS = CPL3 data segment
+ "movl $0,12(%%rsp)\n" // ESP = nullptr (unused)
+ "movl $0, 8(%%rsp)\n" // EFLAGS
+ "movl $0x23, 4(%%rsp)\n" // CS = CPL3 32-bit code segment
+ "movl %%edx, 0(%%rsp)\n" // EIP
+ "iretl\n"
+ "int $3\n"
+ :
+ : [code] "m"(code), [ip] "d"(m.ptr())
+ : "rax", "rbx", "rsp");
+}
+
+constexpr int kExitCode = 42;
+
+TEST(Syscall32Bit, Int80) {
+ switch (GvisorPlatform()) {
+ case Platform::kKVM:
+ // TODO: 32-bit segments are broken (but not explictly
+ // disabled).
+ return;
+ case Platform::kPtrace:
+ // TODO: The ptrace platform does not have a consistent story
+ // here.
+ return;
+ case Platform::kNative:
+ break;
+ }
+
+ // Upstream Linux. 32-bit syscalls allowed.
+ EXPECT_EXIT(ExitGroup32(kInt80, kExitCode), ::testing::ExitedWithCode(42),
+ "");
+}
+
+TEST(Syscall32Bit, Sysenter) {
+ switch (GvisorPlatform()) {
+ case Platform::kKVM:
+ // TODO: See above.
+ return;
+ case Platform::kPtrace:
+ // TODO: See above.
+ return;
+ case Platform::kNative:
+ break;
+ }
+
+ if (GetCPUVendor() == CPUVendor::kAMD) {
+ // SYSENTER is an illegal instruction in compatibility mode on AMD.
+ EXPECT_EXIT(ExitGroup32(kSysenter, kExitCode),
+ ::testing::KilledBySignal(SIGILL), "");
+ return;
+ }
+
+ // Upstream Linux on !AMD, 32-bit syscalls allowed.
+ EXPECT_EXIT(ExitGroup32(kSysenter, kExitCode), ::testing::ExitedWithCode(42),
+ "");
+}
+
+TEST(Syscall32Bit, Syscall) {
+ switch (GvisorPlatform()) {
+ case Platform::kKVM:
+ // TODO: See above.
+ return;
+ case Platform::kPtrace:
+ // TODO: See above.
+ return;
+ case Platform::kNative:
+ break;
+ }
+
+ if (GetCPUVendor() == CPUVendor::kIntel) {
+ // SYSCALL is an illegal instruction in compatibility mode on Intel.
+ EXPECT_EXIT(ExitGroup32(kSyscall, kExitCode),
+ ::testing::KilledBySignal(SIGILL), "");
+ return;
+ }
+
+ // Upstream Linux on !Intel, 32-bit syscalls allowed.
+ EXPECT_EXIT(ExitGroup32(kSyscall, kExitCode), ::testing::ExitedWithCode(42),
+ "");
+}
+
+// Far call code called below.
+//
+// Input stack layout:
+//
+// %esp+12 lcall segment
+// %esp+8 lcall address offset
+// %esp+0 return address
+//
+// The lcall will enter compatibility mode and jump to the call address (the
+// address of the lret). The lret will return to 64-bit mode at the retq, which
+// will return to the external caller of this function.
+//
+// Since this enters compatibility mode, it must be mapped in a 32-bit region of
+// address space and have a 32-bit stack pointer.
+constexpr char kFarCall[] = {
+ '\x67', '\xff', '\x5c', '\x24', '\x08', // lcall *8(%esp)
+ '\xc3', // retq
+ '\xcb', // lret
+};
+
+void FarCall32() {
+ const Mapping m = ASSERT_NO_ERRNO_AND_VALUE(
+ Mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0));
+
+ // Fill with INT 3 in case we execute too far.
+ memset(m.ptr(), kInt3, m.len());
+
+ // 32-bit code.
+ memcpy(m.ptr(), kFarCall, sizeof(kFarCall));
+
+ // Use the end of the code page as its stack.
+ uintptr_t stack = m.endaddr();
+
+ uintptr_t lcall = m.addr();
+ uintptr_t lret = m.addr() + sizeof(kFarCall) - 1;
+
+ // N.B. We must save and restore RSP manually. GCC can do so automatically
+ // with an "rsp" clobber, but clang cannot.
+ asm volatile(
+ // Place the address of lret (%edx) and the 32-bit code segment (0x23) on
+ // the 32-bit stack for lcall.
+ "subl $0x8, %%ecx\n"
+ "movl $0x23, 4(%%ecx)\n"
+ "movl %%edx, 0(%%ecx)\n"
+
+ // Save the current stack and switch to 32-bit stack.
+ "pushq %%rbp\n"
+ "movq %%rsp, %%rbp\n"
+ "movq %%rcx, %%rsp\n"
+
+ // Run the lcall code.
+ "callq *%%rbx\n"
+
+ // Restore the old stack.
+ "leaveq\n"
+ : "+c"(stack)
+ : "b"(lcall), "d"(lret));
+}
+
+TEST(Call32Bit, Disallowed) {
+ switch (GvisorPlatform()) {
+ case Platform::kKVM:
+ // TODO: See above.
+ return;
+ case Platform::kPtrace:
+ // The ptrace platform cannot prevent switching to compatibility mode.
+ ABSL_FALLTHROUGH_INTENDED;
+ case Platform::kNative:
+ break;
+ }
+
+ // Shouldn't crash.
+ FarCall32();
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD
new file mode 100644
index 000000000..1c48a2a43
--- /dev/null
+++ b/test/syscalls/linux/BUILD
@@ -0,0 +1,2951 @@
+package(
+ default_visibility = ["//:sandbox"],
+ licenses = ["notice"], # Apache 2.0
+)
+
+cc_binary(
+ name = "sigaltstack_check",
+ testonly = 1,
+ srcs = ["sigaltstack_check.cc"],
+ deps = ["//test/util:logging"],
+)
+
+cc_binary(
+ name = "exec_assert_closed_workload",
+ testonly = 1,
+ srcs = ["exec_assert_closed_workload.cc"],
+ deps = [
+ "@com_google_absl//absl/strings",
+ ],
+)
+
+cc_binary(
+ name = "exec_basic_workload",
+ testonly = 1,
+ srcs = [
+ "exec.h",
+ "exec_basic_workload.cc",
+ ],
+)
+
+cc_binary(
+ name = "exec_proc_exe_workload",
+ testonly = 1,
+ srcs = ["exec_proc_exe_workload.cc"],
+ deps = [
+ "//test/util:fs_util",
+ "//test/util:posix_error",
+ ],
+)
+
+cc_binary(
+ name = "exec_state_workload",
+ testonly = 1,
+ srcs = ["exec_state_workload.cc"],
+)
+
+sh_binary(
+ name = "exit_script",
+ testonly = 1,
+ srcs = [
+ "exit_script.sh",
+ ],
+)
+
+cc_binary(
+ name = "priority_execve",
+ testonly = 1,
+ srcs = [
+ "priority_execve.cc",
+ ],
+)
+
+cc_library(
+ name = "base_poll_test",
+ testonly = 1,
+ srcs = ["base_poll_test.cc"],
+ hdrs = ["base_poll_test.h"],
+ deps = [
+ "//test/util:logging",
+ "//test/util:signal_util",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_absl//absl/memory",
+ "@com_google_absl//absl/synchronization",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_library(
+ name = "file_base",
+ testonly = 1,
+ hdrs = ["file_base.h"],
+ deps = [
+ "//test/util:file_descriptor",
+ "//test/util:posix_error",
+ "//test/util:temp_path",
+ "//test/util:test_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_library(
+ name = "socket_netlink_util",
+ testonly = 1,
+ srcs = ["socket_netlink_util.cc"],
+ hdrs = ["socket_netlink_util.h"],
+ deps = [
+ ":socket_test_util",
+ "//test/util:file_descriptor",
+ "//test/util:posix_error",
+ "@com_google_absl//absl/strings",
+ ],
+)
+
+cc_library(
+ name = "socket_test_util",
+ testonly = 1,
+ srcs = ["socket_test_util.cc"],
+ hdrs = ["socket_test_util.h"],
+ deps = [
+ "//test/util:file_descriptor",
+ "//test/util:posix_error",
+ "//test/util:temp_path",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_absl//absl/memory",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/strings:str_format",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_library(
+ name = "temp_umask",
+ hdrs = ["temp_umask.h"],
+)
+
+cc_library(
+ name = "unix_domain_socket_test_util",
+ testonly = 1,
+ srcs = ["unix_domain_socket_test_util.cc"],
+ hdrs = ["unix_domain_socket_test_util.h"],
+ deps = [
+ ":socket_test_util",
+ "//test/util:test_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_library(
+ name = "ip_socket_test_util",
+ testonly = 1,
+ srcs = ["ip_socket_test_util.cc"],
+ hdrs = ["ip_socket_test_util.h"],
+ deps = [
+ ":socket_test_util",
+ ],
+)
+
+cc_binary(
+ name = "clock_nanosleep_test",
+ testonly = 1,
+ srcs = ["clock_nanosleep.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:cleanup",
+ "//test/util:posix_error",
+ "//test/util:signal_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "//test/util:timer_util",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "32bit_test",
+ testonly = 1,
+ srcs = ["32bit.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:memory_util",
+ "//test/util:posix_error",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "accept_bind_test",
+ testonly = 1,
+ srcs = ["accept_bind.cc"],
+ linkstatic = 1,
+ deps = [
+ ":socket_test_util",
+ ":unix_domain_socket_test_util",
+ "//test/util:file_descriptor",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "accept_bind_stream_test",
+ testonly = 1,
+ srcs = ["accept_bind_stream.cc"],
+ linkstatic = 1,
+ deps = [
+ ":socket_test_util",
+ ":unix_domain_socket_test_util",
+ "//test/util:file_descriptor",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "access_test",
+ testonly = 1,
+ srcs = ["access.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:capability_util",
+ "//test/util:fs_util",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "affinity_test",
+ testonly = 1,
+ srcs = ["affinity.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:cleanup",
+ "//test/util:fs_util",
+ "//test/util:posix_error",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "aio_test",
+ testonly = 1,
+ srcs = [
+ "aio.cc",
+ "file_base.h",
+ ],
+ linkstatic = 1,
+ deps = [
+ "//test/util:cleanup",
+ "//test/util:file_descriptor",
+ "//test/util:posix_error",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "alarm_test",
+ testonly = 1,
+ srcs = ["alarm.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:file_descriptor",
+ "//test/util:logging",
+ "//test/util:signal_util",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "bad_test",
+ testonly = 1,
+ srcs = ["bad.cc"],
+ linkstatic = 1,
+ visibility = [
+ "//:sandbox",
+ ],
+ deps = [
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "bind_test",
+ testonly = 1,
+ srcs = ["bind.cc"],
+ linkstatic = 1,
+ deps = [
+ ":socket_test_util",
+ ":unix_domain_socket_test_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "brk_test",
+ testonly = 1,
+ srcs = ["brk.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:test_main",
+ "//test/util:test_util",
+ ],
+)
+
+cc_binary(
+ name = "chdir_test",
+ testonly = 1,
+ srcs = ["chdir.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:capability_util",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "chmod_test",
+ testonly = 1,
+ srcs = ["chmod.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:capability_util",
+ "//test/util:file_descriptor",
+ "//test/util:fs_util",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "chown_test",
+ testonly = 1,
+ srcs = ["chown.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:capability_util",
+ "//test/util:file_descriptor",
+ "//test/util:fs_util",
+ "//test/util:posix_error",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_absl//absl/synchronization",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "sticky_test",
+ testonly = 1,
+ srcs = ["sticky.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:capability_util",
+ "//test/util:file_descriptor",
+ "//test/util:fs_util",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "chroot_test",
+ testonly = 1,
+ srcs = ["chroot.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:capability_util",
+ "//test/util:cleanup",
+ "//test/util:file_descriptor",
+ "//test/util:fs_util",
+ "//test/util:mount_util",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "clock_getres_test",
+ testonly = 1,
+ srcs = ["clock_getres.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "clock_gettime_test",
+ testonly = 1,
+ srcs = ["clock_gettime.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "concurrency_test",
+ testonly = 1,
+ srcs = ["concurrency.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "creat_test",
+ testonly = 1,
+ srcs = ["creat.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:fs_util",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "dev_test",
+ testonly = 1,
+ srcs = ["dev.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:file_descriptor",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "dup_test",
+ testonly = 1,
+ srcs = ["dup.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:file_descriptor",
+ "//test/util:posix_error",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "epoll_test",
+ testonly = 1,
+ srcs = ["epoll.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:file_descriptor",
+ "//test/util:posix_error",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "eventfd_test",
+ testonly = 1,
+ srcs = ["eventfd.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "exceptions_test",
+ testonly = 1,
+ srcs = ["exceptions.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:logging",
+ "//test/util:signal_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "getcpu_test",
+ testonly = 1,
+ srcs = ["getcpu.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "getcpu_host_test",
+ testonly = 1,
+ srcs = ["getcpu.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "getrusage_test",
+ testonly = 1,
+ srcs = ["getrusage.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:logging",
+ "//test/util:memory_util",
+ "//test/util:signal_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "exec_binary_test",
+ testonly = 1,
+ srcs = ["exec_binary.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:cleanup",
+ "//test/util:file_descriptor",
+ "//test/util:fs_util",
+ "//test/util:multiprocess_util",
+ "//test/util:posix_error",
+ "//test/util:proc_util",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "exec_test",
+ testonly = 1,
+ srcs = [
+ "exec.cc",
+ "exec.h",
+ ],
+ data = [
+ ":exec_assert_closed_workload",
+ ":exec_basic_workload",
+ ":exec_proc_exe_workload",
+ ":exec_state_workload",
+ ":exit_script",
+ ":priority_execve",
+ ],
+ linkstatic = 1,
+ deps = [
+ "//test/util:file_descriptor",
+ "//test/util:fs_util",
+ "//test/util:multiprocess_util",
+ "//test/util:posix_error",
+ "//test/util:temp_path",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/synchronization",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "exit_test",
+ testonly = 1,
+ srcs = ["exit.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:file_descriptor",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "fallocate_test",
+ testonly = 1,
+ srcs = ["fallocate.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:file_descriptor",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "fault_test",
+ testonly = 1,
+ srcs = ["fault.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "fchdir_test",
+ testonly = 1,
+ srcs = ["fchdir.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:capability_util",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "fcntl_test",
+ testonly = 1,
+ srcs = ["fcntl.cc"],
+ linkstatic = 1,
+ deps = [
+ ":socket_test_util",
+ "//test/util:cleanup",
+ "//test/util:multiprocess_util",
+ "//test/util:posix_error",
+ "//test/util:temp_path",
+ "//test/util:test_util",
+ "//test/util:timer_util",
+ "@com_google_absl//absl/base:core_headers",
+ "@com_google_absl//absl/memory",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "flock_test",
+ testonly = 1,
+ srcs = [
+ "file_base.h",
+ "flock.cc",
+ ],
+ linkstatic = 1,
+ deps = [
+ "//test/util:file_descriptor",
+ "//test/util:posix_error",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "//test/util:timer_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "fork_test",
+ testonly = 1,
+ srcs = ["fork.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:logging",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "fpsig_fork_test",
+ testonly = 1,
+ srcs = ["fpsig_fork.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:logging",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "fpsig_nested_test",
+ testonly = 1,
+ srcs = ["fpsig_nested.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "sync_file_range_test",
+ testonly = 1,
+ srcs = ["sync_file_range.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:file_descriptor",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "fsync_test",
+ testonly = 1,
+ srcs = ["fsync.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "futex_test",
+ testonly = 1,
+ srcs = ["futex.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:cleanup",
+ "//test/util:file_descriptor",
+ "//test/util:memory_util",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "//test/util:timer_util",
+ "@com_google_absl//absl/memory",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "getdents_test",
+ testonly = 1,
+ srcs = ["getdents.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:file_descriptor",
+ "//test/util:fs_util",
+ "//test/util:posix_error",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "getrandom_test",
+ testonly = 1,
+ srcs = ["getrandom.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "inotify_test",
+ testonly = 1,
+ srcs = ["inotify.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:file_descriptor",
+ "//test/util:fs_util",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/strings:str_format",
+ ],
+)
+
+cc_binary(
+ name = "ioctl_test",
+ testonly = 1,
+ srcs = ["ioctl.cc"],
+ linkstatic = 1,
+ deps = [
+ ":ip_socket_test_util",
+ ":socket_test_util",
+ ":unix_domain_socket_test_util",
+ "//test/util:file_descriptor",
+ "//test/util:signal_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "itimer_test",
+ testonly = 1,
+ srcs = ["itimer.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:file_descriptor",
+ "//test/util:logging",
+ "//test/util:multiprocess_util",
+ "//test/util:posix_error",
+ "//test/util:signal_util",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "//test/util:timer_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "kill_test",
+ testonly = 1,
+ srcs = ["kill.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:capability_util",
+ "//test/util:file_descriptor",
+ "//test/util:logging",
+ "//test/util:signal_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_absl//absl/synchronization",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "link_test",
+ testonly = 1,
+ srcs = ["link.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:capability_util",
+ "//test/util:file_descriptor",
+ "//test/util:fs_util",
+ "//test/util:posix_error",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "lseek_test",
+ testonly = 1,
+ srcs = ["lseek.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:file_descriptor",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "madvise_test",
+ testonly = 1,
+ srcs = ["madvise.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:file_descriptor",
+ "//test/util:logging",
+ "//test/util:memory_util",
+ "//test/util:posix_error",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "mempolicy_test",
+ testonly = 1,
+ srcs = ["mempolicy.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:cleanup",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_absl//absl/memory",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "mincore_test",
+ testonly = 1,
+ srcs = ["mincore.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:memory_util",
+ "//test/util:posix_error",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "mkdir_test",
+ testonly = 1,
+ srcs = ["mkdir.cc"],
+ linkstatic = 1,
+ deps = [
+ ":temp_umask",
+ "//test/util:capability_util",
+ "//test/util:fs_util",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "mknod_test",
+ testonly = 1,
+ srcs = ["mknod.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "mmap_test",
+ testonly = 1,
+ srcs = ["mmap.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:cleanup",
+ "//test/util:file_descriptor",
+ "//test/util:fs_util",
+ "//test/util:memory_util",
+ "//test/util:multiprocess_util",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "mount_test",
+ testonly = 1,
+ srcs = ["mount.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:capability_util",
+ "//test/util:file_descriptor",
+ "//test/util:fs_util",
+ "//test/util:mount_util",
+ "//test/util:posix_error",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "mremap_test",
+ testonly = 1,
+ srcs = ["mremap.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:file_descriptor",
+ "//test/util:logging",
+ "//test/util:memory_util",
+ "//test/util:multiprocess_util",
+ "//test/util:posix_error",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "msync_test",
+ testonly = 1,
+ srcs = ["msync.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:file_descriptor",
+ "//test/util:memory_util",
+ "//test/util:posix_error",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ ],
+)
+
+cc_binary(
+ name = "munmap_test",
+ testonly = 1,
+ srcs = ["munmap.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "open_test",
+ testonly = 1,
+ srcs = [
+ "file_base.h",
+ "open.cc",
+ ],
+ linkstatic = 1,
+ deps = [
+ "//test/util:capability_util",
+ "//test/util:cleanup",
+ "//test/util:file_descriptor",
+ "//test/util:fs_util",
+ "//test/util:posix_error",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "open_create_test",
+ testonly = 1,
+ srcs = ["open_create.cc"],
+ linkstatic = 1,
+ deps = [
+ ":temp_umask",
+ "//test/util:capability_util",
+ "//test/util:file_descriptor",
+ "//test/util:fs_util",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "pty_test",
+ testonly = 1,
+ srcs = ["pty.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:file_descriptor",
+ "//test/util:posix_error",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_absl//absl/base:core_headers",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/synchronization",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "partial_bad_buffer_test",
+ testonly = 1,
+ srcs = ["partial_bad_buffer.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:fs_util",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "pause_test",
+ testonly = 1,
+ srcs = ["pause.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:signal_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_absl//absl/synchronization",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "pipe_test",
+ testonly = 1,
+ srcs = ["pipe.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:file_descriptor",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/synchronization",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "poll_test",
+ testonly = 1,
+ srcs = ["poll.cc"],
+ linkstatic = 1,
+ deps = [
+ ":base_poll_test",
+ "//test/util:file_descriptor",
+ "//test/util:logging",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_absl//absl/synchronization",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "ppoll_test",
+ testonly = 1,
+ srcs = ["ppoll.cc"],
+ linkstatic = 1,
+ deps = [
+ ":base_poll_test",
+ "//test/util:signal_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "arch_prctl_test",
+ testonly = 1,
+ srcs = ["arch_prctl.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "prctl_test",
+ testonly = 1,
+ srcs = ["prctl.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:capability_util",
+ "//test/util:multiprocess_util",
+ "//test/util:posix_error",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "prctl_setuid_test",
+ testonly = 1,
+ srcs = ["prctl_setuid.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:capability_util",
+ "//test/util:logging",
+ "//test/util:multiprocess_util",
+ "//test/util:posix_error",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "pread64_test",
+ testonly = 1,
+ srcs = ["pread64.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:file_descriptor",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "preadv_test",
+ testonly = 1,
+ srcs = ["preadv.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:file_descriptor",
+ "//test/util:logging",
+ "//test/util:memory_util",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "//test/util:timer_util",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "preadv2_test",
+ testonly = 1,
+ srcs = [
+ "preadv2.cc",
+ "readv_common.cc",
+ "readv_common.h",
+ ],
+ linkstatic = 1,
+ deps = [
+ ":file_base",
+ "//test/util:file_descriptor",
+ "//test/util:memory_util",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "priority_test",
+ testonly = 1,
+ srcs = ["priority.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:capability_util",
+ "//test/util:fs_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "proc_test",
+ testonly = 1,
+ srcs = ["proc.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:capability_util",
+ "//test/util:cleanup",
+ "//test/util:file_descriptor",
+ "//test/util:fs_util",
+ "//test/util:memory_util",
+ "//test/util:posix_error",
+ "//test/util:temp_path",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "//test/util:timer_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/synchronization",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "proc_net_test",
+ testonly = 1,
+ srcs = ["proc_net.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:file_descriptor",
+ "//test/util:fs_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "pselect_test",
+ testonly = 1,
+ srcs = ["pselect.cc"],
+ linkstatic = 1,
+ deps = [
+ ":base_poll_test",
+ "//test/util:signal_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "ptrace_test",
+ testonly = 1,
+ srcs = ["ptrace.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:logging",
+ "//test/util:multiprocess_util",
+ "//test/util:signal_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "pwrite64_test",
+ testonly = 1,
+ srcs = ["pwrite64.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "read_test",
+ testonly = 1,
+ srcs = ["read.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:file_descriptor",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "readv_test",
+ testonly = 1,
+ srcs = [
+ "file_base.h",
+ "readv.cc",
+ "readv_common.cc",
+ "readv_common.h",
+ ],
+ linkstatic = 1,
+ deps = [
+ "//test/util:file_descriptor",
+ "//test/util:posix_error",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:timer_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "readv_socket_test",
+ testonly = 1,
+ srcs = [
+ "file_base.h",
+ "readv_common.cc",
+ "readv_common.h",
+ "readv_socket.cc",
+ ],
+ linkstatic = 1,
+ deps = [
+ "//test/util:file_descriptor",
+ "//test/util:posix_error",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "rename_test",
+ testonly = 1,
+ srcs = ["rename.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:capability_util",
+ "//test/util:cleanup",
+ "//test/util:file_descriptor",
+ "//test/util:fs_util",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "rlimits_test",
+ testonly = 1,
+ srcs = ["rlimits.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:capability_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ ],
+)
+
+cc_binary(
+ name = "rtsignal_test",
+ testonly = 1,
+ srcs = ["rtsignal.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:cleanup",
+ "//test/util:logging",
+ "//test/util:posix_error",
+ "//test/util:signal_util",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "sched_test",
+ testonly = 1,
+ srcs = ["sched.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "sched_yield_test",
+ testonly = 1,
+ srcs = ["sched_yield.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "seccomp_test",
+ testonly = 1,
+ srcs = ["seccomp.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:logging",
+ "//test/util:memory_util",
+ "//test/util:multiprocess_util",
+ "//test/util:posix_error",
+ "//test/util:proc_util",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_absl//absl/base:core_headers",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "select_test",
+ testonly = 1,
+ srcs = ["select.cc"],
+ linkstatic = 1,
+ deps = [
+ ":base_poll_test",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "sendfile_test",
+ testonly = 1,
+ srcs = ["sendfile.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:file_descriptor",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "sendfile_socket_test",
+ testonly = 1,
+ srcs = ["sendfile_socket.cc"],
+ linkstatic = 1,
+ deps = [
+ ":socket_test_util",
+ "//test/util:file_descriptor",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "sigaction_test",
+ testonly = 1,
+ srcs = ["sigaction.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "sigaltstack_test",
+ testonly = 1,
+ srcs = ["sigaltstack.cc"],
+ data = [
+ ":sigaltstack_check",
+ ],
+ linkstatic = 1,
+ deps = [
+ "//test/util:cleanup",
+ "//test/util:fs_util",
+ "//test/util:multiprocess_util",
+ "//test/util:posix_error",
+ "//test/util:signal_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "sigiret_test",
+ testonly = 1,
+ srcs = ["sigiret.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:logging",
+ "//test/util:signal_util",
+ "//test/util:test_util",
+ "//test/util:timer_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "sigprocmask_test",
+ testonly = 1,
+ srcs = ["sigprocmask.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:signal_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "sigstop_test",
+ testonly = 1,
+ srcs = ["sigstop.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:multiprocess_util",
+ "//test/util:posix_error",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "sigtimedwait_test",
+ testonly = 1,
+ srcs = ["sigtimedwait.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:logging",
+ "//test/util:signal_util",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "//test/util:timer_util",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_library(
+ name = "socket_generic_test_cases",
+ testonly = 1,
+ srcs = [
+ "socket_generic.cc",
+ ],
+ hdrs = [
+ "socket_generic.h",
+ ],
+ deps = [
+ ":socket_test_util",
+ ":unix_domain_socket_test_util",
+ "//test/util:test_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_googletest//:gtest",
+ ],
+ alwayslink = 1,
+)
+
+cc_library(
+ name = "socket_unix_dgram_test_cases",
+ testonly = 1,
+ srcs = ["socket_unix_dgram.cc"],
+ hdrs = ["socket_unix_dgram.h"],
+ deps = [
+ ":socket_test_util",
+ ":unix_domain_socket_test_util",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+ alwayslink = 1,
+)
+
+cc_library(
+ name = "socket_unix_seqpacket_test_cases",
+ testonly = 1,
+ srcs = ["socket_unix_seqpacket.cc"],
+ hdrs = ["socket_unix_seqpacket.h"],
+ deps = [
+ ":socket_test_util",
+ ":unix_domain_socket_test_util",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+ alwayslink = 1,
+)
+
+cc_library(
+ name = "socket_ip_tcp_generic_test_cases",
+ testonly = 1,
+ srcs = [
+ "socket_ip_tcp_generic.cc",
+ ],
+ hdrs = [
+ "socket_ip_tcp_generic.h",
+ ],
+ deps = [
+ ":socket_test_util",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+ alwayslink = 1,
+)
+
+cc_library(
+ name = "socket_non_blocking_test_cases",
+ testonly = 1,
+ srcs = [
+ "socket_non_blocking.cc",
+ ],
+ hdrs = [
+ "socket_non_blocking.h",
+ ],
+ deps = [
+ ":socket_test_util",
+ ":unix_domain_socket_test_util",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+ alwayslink = 1,
+)
+
+cc_library(
+ name = "socket_unix_non_stream_test_cases",
+ testonly = 1,
+ srcs = [
+ "socket_unix_non_stream.cc",
+ ],
+ hdrs = [
+ "socket_unix_non_stream.h",
+ ],
+ deps = [
+ ":socket_test_util",
+ ":unix_domain_socket_test_util",
+ "//test/util:memory_util",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+ alwayslink = 1,
+)
+
+cc_library(
+ name = "socket_non_stream_test_cases",
+ testonly = 1,
+ srcs = [
+ "socket_non_stream.cc",
+ ],
+ hdrs = [
+ "socket_non_stream.h",
+ ],
+ deps = [
+ ":ip_socket_test_util",
+ ":socket_test_util",
+ ":unix_domain_socket_test_util",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+ alwayslink = 1,
+)
+
+cc_binary(
+ name = "socket_abstract_test",
+ testonly = 1,
+ srcs = [
+ "socket_abstract.cc",
+ ],
+ linkstatic = 1,
+ deps = [
+ ":socket_generic_test_cases",
+ ":socket_test_util",
+ ":socket_unix_test_cases",
+ ":unix_domain_socket_test_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ ],
+)
+
+cc_binary(
+ name = "socket_abstract_non_blocking_test",
+ testonly = 1,
+ srcs = [
+ "socket_unix_abstract_nonblock.cc",
+ ],
+ linkstatic = 1,
+ deps = [
+ ":socket_non_blocking_test_cases",
+ ":socket_test_util",
+ ":unix_domain_socket_test_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ ],
+)
+
+cc_binary(
+ name = "socket_unix_dgram_local_test",
+ testonly = 1,
+ srcs = ["socket_unix_dgram_local.cc"],
+ linkstatic = 1,
+ deps = [
+ ":socket_non_stream_test_cases",
+ ":socket_test_util",
+ ":socket_unix_dgram_test_cases",
+ ":socket_unix_non_stream_test_cases",
+ ":unix_domain_socket_test_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ ],
+)
+
+cc_binary(
+ name = "socket_unix_dgram_non_blocking_test",
+ testonly = 1,
+ srcs = ["socket_unix_dgram_non_blocking.cc"],
+ linkstatic = 1,
+ deps = [
+ ":socket_test_util",
+ ":unix_domain_socket_test_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "socket_unix_seqpacket_local_test",
+ testonly = 1,
+ srcs = [
+ "socket_unix_seqpacket_local.cc",
+ ],
+ linkstatic = 1,
+ deps = [
+ ":socket_non_stream_test_cases",
+ ":socket_test_util",
+ ":socket_unix_non_stream_test_cases",
+ ":socket_unix_seqpacket_test_cases",
+ ":unix_domain_socket_test_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ ],
+)
+
+cc_binary(
+ name = "socket_unix_stream_test",
+ testonly = 1,
+ srcs = ["socket_unix_stream.cc"],
+ linkstatic = 1,
+ deps = [
+ ":socket_test_util",
+ ":unix_domain_socket_test_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "socket_ip_tcp_generic_loopback_test",
+ testonly = 1,
+ srcs = [
+ "socket_ip_tcp_generic_loopback.cc",
+ ],
+ linkstatic = 1,
+ deps = [
+ ":ip_socket_test_util",
+ ":socket_ip_tcp_generic_test_cases",
+ ":socket_test_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ ],
+)
+
+cc_binary(
+ name = "socket_ip_tcp_udp_generic_loopback_test",
+ testonly = 1,
+ srcs = [
+ "socket_ip_tcp_udp_generic.cc",
+ ],
+ linkstatic = 1,
+ deps = [
+ ":ip_socket_test_util",
+ ":socket_test_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "socket_ip_tcp_loopback_test",
+ testonly = 1,
+ srcs = [
+ "socket_ip_tcp_loopback.cc",
+ ],
+ linkstatic = 1,
+ deps = [
+ ":ip_socket_test_util",
+ ":socket_generic_test_cases",
+ ":socket_test_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ ],
+)
+
+cc_binary(
+ name = "socket_ip_tcp_loopback_non_blocking_test",
+ testonly = 1,
+ srcs = [
+ "socket_ip_tcp_loopback_nonblock.cc",
+ ],
+ linkstatic = 1,
+ deps = [
+ ":ip_socket_test_util",
+ ":socket_non_blocking_test_cases",
+ ":socket_test_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ ],
+)
+
+cc_binary(
+ name = "socket_ip_udp_loopback_test",
+ testonly = 1,
+ srcs = [
+ "socket_ip_udp_loopback.cc",
+ ],
+ linkstatic = 1,
+ deps = [
+ ":ip_socket_test_util",
+ ":socket_generic_test_cases",
+ ":socket_non_stream_test_cases",
+ ":socket_test_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ ],
+)
+
+cc_binary(
+ name = "socket_ip_udp_loopback_non_blocking_test",
+ testonly = 1,
+ srcs = [
+ "socket_ip_udp_loopback_nonblock.cc",
+ ],
+ linkstatic = 1,
+ deps = [
+ ":ip_socket_test_util",
+ ":socket_non_blocking_test_cases",
+ ":socket_test_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ ],
+)
+
+cc_binary(
+ name = "socket_domain_test",
+ testonly = 1,
+ srcs = [
+ "socket_unix_domain.cc",
+ ],
+ linkstatic = 1,
+ deps = [
+ ":socket_generic_test_cases",
+ ":socket_test_util",
+ ":unix_domain_socket_test_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ ],
+)
+
+cc_binary(
+ name = "socket_domain_non_blocking_test",
+ testonly = 1,
+ srcs = [
+ "socket_unix_pair_nonblock.cc",
+ ],
+ linkstatic = 1,
+ deps = [
+ ":socket_non_blocking_test_cases",
+ ":socket_test_util",
+ ":unix_domain_socket_test_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ ],
+)
+
+cc_binary(
+ name = "socket_filesystem_test",
+ testonly = 1,
+ srcs = [
+ "socket_filesystem.cc",
+ ],
+ linkstatic = 1,
+ deps = [
+ ":socket_generic_test_cases",
+ ":socket_test_util",
+ ":socket_unix_test_cases",
+ ":unix_domain_socket_test_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ ],
+)
+
+cc_binary(
+ name = "socket_filesystem_non_blocking_test",
+ testonly = 1,
+ srcs = [
+ "socket_unix_filesystem_nonblock.cc",
+ ],
+ linkstatic = 1,
+ deps = [
+ ":socket_non_blocking_test_cases",
+ ":socket_test_util",
+ ":unix_domain_socket_test_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ ],
+)
+
+cc_binary(
+ name = "socket_inet_loopback_test",
+ testonly = 1,
+ srcs = ["socket_inet_loopback.cc"],
+ linkstatic = 1,
+ deps = [
+ ":socket_test_util",
+ "//test/util:file_descriptor",
+ "//test/util:posix_error",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "socket_netlink_route_test",
+ testonly = 1,
+ srcs = ["socket_netlink_route.cc"],
+ linkstatic = 1,
+ deps = [
+ ":socket_netlink_util",
+ ":socket_test_util",
+ "//test/util:cleanup",
+ "//test/util:file_descriptor",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+# These socket tests are in a library because the test cases are shared
+# across several test build targets.
+cc_library(
+ name = "socket_stream_test_cases",
+ testonly = 1,
+ srcs = [
+ "socket_stream.cc",
+ ],
+ hdrs = [
+ "socket_stream.h",
+ ],
+ deps = [
+ ":socket_test_util",
+ ":unix_domain_socket_test_util",
+ "//test/util:test_util",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+ alwayslink = 1,
+)
+
+cc_library(
+ name = "socket_unix_test_cases",
+ testonly = 1,
+ srcs = [
+ "socket_unix.cc",
+ ],
+ hdrs = [
+ "socket_unix.h",
+ ],
+ deps = [
+ ":socket_test_util",
+ ":unix_domain_socket_test_util",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_googletest//:gtest",
+ ],
+ alwayslink = 1,
+)
+
+cc_library(
+ name = "socket_stream_blocking_test_cases",
+ testonly = 1,
+ srcs = [
+ "socket_stream_blocking.cc",
+ ],
+ hdrs = [
+ "socket_stream_blocking.h",
+ ],
+ deps = [
+ ":socket_test_util",
+ ":unix_domain_socket_test_util",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "//test/util:timer_util",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+ alwayslink = 1,
+)
+
+cc_library(
+ name = "socket_stream_nonblocking_test_cases",
+ testonly = 1,
+ srcs = [
+ "socket_stream_nonblock.cc",
+ ],
+ hdrs = [
+ "socket_stream_nonblock.h",
+ ],
+ deps = [
+ ":socket_test_util",
+ ":unix_domain_socket_test_util",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+ alwayslink = 1,
+)
+
+cc_binary(
+ name = "socket_stream_local_test",
+ testonly = 1,
+ srcs = [
+ "socket_unix_stream_local.cc",
+ ],
+ linkstatic = 1,
+ deps = [
+ ":socket_stream_test_cases",
+ ":socket_test_util",
+ ":unix_domain_socket_test_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ ],
+)
+
+cc_binary(
+ name = "socket_stream_blocking_local_test",
+ testonly = 1,
+ srcs = [
+ "socket_unix_stream_blocking_local.cc",
+ ],
+ linkstatic = 1,
+ deps = [
+ ":socket_stream_blocking_test_cases",
+ ":socket_test_util",
+ ":unix_domain_socket_test_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ ],
+)
+
+cc_binary(
+ name = "socket_stream_blocking_tcp_test",
+ testonly = 1,
+ srcs = [
+ "socket_ip_tcp_loopback_blocking.cc",
+ ],
+ linkstatic = 1,
+ deps = [
+ ":ip_socket_test_util",
+ ":socket_stream_blocking_test_cases",
+ ":socket_test_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ ],
+)
+
+cc_binary(
+ name = "socket_stream_nonblock_local_test",
+ testonly = 1,
+ srcs = [
+ "socket_unix_stream_nonblock_local.cc",
+ ],
+ linkstatic = 1,
+ deps = [
+ ":socket_stream_nonblocking_test_cases",
+ ":socket_test_util",
+ ":unix_domain_socket_test_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ ],
+)
+
+cc_binary(
+ name = "socket_unix_abstract_test",
+ testonly = 1,
+ srcs = [
+ "socket_unix_abstract.cc",
+ ],
+ linkstatic = 1,
+ deps = [
+ ":socket_test_util",
+ ":socket_unix_test_cases",
+ ":unix_domain_socket_test_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ ],
+)
+
+cc_binary(
+ name = "socket_unix_unbound_dgram_test",
+ testonly = 1,
+ srcs = ["socket_unix_unbound_dgram.cc"],
+ linkstatic = 1,
+ deps = [
+ ":socket_test_util",
+ ":unix_domain_socket_test_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "socket_unix_unbound_abstract_test",
+ testonly = 1,
+ srcs = ["socket_unix_unbound_abstract.cc"],
+ linkstatic = 1,
+ deps = [
+ ":socket_test_util",
+ ":unix_domain_socket_test_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "socket_unix_unbound_filesystem_test",
+ testonly = 1,
+ srcs = ["socket_unix_unbound_filesystem.cc"],
+ linkstatic = 1,
+ deps = [
+ ":socket_test_util",
+ ":unix_domain_socket_test_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "socket_unix_filesystem_test",
+ testonly = 1,
+ srcs = [
+ "socket_unix_filesystem.cc",
+ ],
+ linkstatic = 1,
+ deps = [
+ ":socket_test_util",
+ ":socket_unix_test_cases",
+ ":unix_domain_socket_test_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_library(
+ name = "socket_non_stream_blocking_test_cases",
+ testonly = 1,
+ srcs = [
+ "socket_non_stream_blocking.cc",
+ ],
+ hdrs = [
+ "socket_non_stream_blocking.h",
+ ],
+ deps = [
+ ":socket_test_util",
+ ":unix_domain_socket_test_util",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "//test/util:timer_util",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+ alwayslink = 1,
+)
+
+cc_binary(
+ name = "socket_non_stream_blocking_local_test",
+ testonly = 1,
+ srcs = [
+ "socket_unix_non_stream_blocking_local.cc",
+ ],
+ linkstatic = 1,
+ deps = [
+ ":socket_non_stream_blocking_test_cases",
+ ":socket_test_util",
+ ":unix_domain_socket_test_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "socket_non_stream_blocking_udp_test",
+ testonly = 1,
+ srcs = [
+ "socket_ip_udp_loopback_blocking.cc",
+ ],
+ linkstatic = 1,
+ deps = [
+ ":ip_socket_test_util",
+ ":socket_non_stream_blocking_test_cases",
+ ":socket_test_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "socket_unix_pair_test",
+ testonly = 1,
+ srcs = [
+ "socket_unix_pair.cc",
+ ],
+ linkstatic = 1,
+ deps = [
+ ":socket_test_util",
+ ":socket_unix_test_cases",
+ ":unix_domain_socket_test_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "socket_unix_unbound_seqpacket_test",
+ testonly = 1,
+ srcs = ["socket_unix_unbound_seqpacket.cc"],
+ linkstatic = 1,
+ deps = [
+ ":socket_test_util",
+ ":unix_domain_socket_test_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "socket_unix_unbound_stream_test",
+ testonly = 1,
+ srcs = ["socket_unix_unbound_stream.cc"],
+ linkstatic = 1,
+ deps = [
+ ":socket_test_util",
+ ":unix_domain_socket_test_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "socket_netdevice_test",
+ testonly = 1,
+ srcs = ["socket_netdevice.cc"],
+ linkstatic = 1,
+ deps = [
+ ":socket_netlink_util",
+ ":socket_test_util",
+ "//test/util:file_descriptor",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_absl//absl/base:endian",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "stat_test",
+ testonly = 1,
+ srcs = [
+ "file_base.h",
+ "stat.cc",
+ ],
+ linkstatic = 1,
+ deps = [
+ "//test/util:cleanup",
+ "//test/util:file_descriptor",
+ "//test/util:fs_util",
+ "//test/util:posix_error",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "stat_times_test",
+ testonly = 1,
+ srcs = ["stat_times.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:file_descriptor",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "statfs_test",
+ testonly = 1,
+ srcs = [
+ "file_base.h",
+ "statfs.cc",
+ ],
+ linkstatic = 1,
+ deps = [
+ "//test/util:file_descriptor",
+ "//test/util:posix_error",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "symlink_test",
+ testonly = 1,
+ srcs = ["symlink.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:capability_util",
+ "//test/util:file_descriptor",
+ "//test/util:fs_util",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "sync_test",
+ testonly = 1,
+ srcs = ["sync.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "sysinfo_test",
+ testonly = 1,
+ srcs = ["sysinfo.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "syslog_test",
+ testonly = 1,
+ srcs = ["syslog.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "sysret_test",
+ testonly = 1,
+ srcs = ["sysret.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:logging",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "tcp_socket_test",
+ testonly = 1,
+ srcs = ["tcp_socket.cc"],
+ linkstatic = 1,
+ deps = [
+ ":socket_test_util",
+ "//test/util:file_descriptor",
+ "//test/util:posix_error",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "tgkill_test",
+ testonly = 1,
+ srcs = ["tgkill.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:signal_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "time_test",
+ testonly = 1,
+ srcs = ["time.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:proc_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "timerfd_test",
+ testonly = 1,
+ srcs = ["timerfd.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:file_descriptor",
+ "//test/util:posix_error",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_absl//absl/time",
+ ],
+)
+
+cc_binary(
+ name = "timers_test",
+ testonly = 1,
+ srcs = ["timers.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:cleanup",
+ "//test/util:logging",
+ "//test/util:multiprocess_util",
+ "//test/util:posix_error",
+ "//test/util:signal_util",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "tkill_test",
+ testonly = 1,
+ srcs = ["tkill.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:logging",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "truncate_test",
+ testonly = 1,
+ srcs = ["truncate.cc"],
+ linkstatic = 1,
+ deps = [
+ ":file_base",
+ "//test/util:capability_util",
+ "//test/util:cleanup",
+ "//test/util:file_descriptor",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "udp_socket_test",
+ testonly = 1,
+ srcs = ["udp_socket.cc"],
+ linkstatic = 1,
+ deps = [
+ ":socket_test_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_absl//absl/base:core_headers",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "udp_bind_test",
+ testonly = 1,
+ srcs = ["udp_bind.cc"],
+ linkstatic = 1,
+ deps = [
+ ":socket_test_util",
+ "//test/util:file_descriptor",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "uidgid_test",
+ testonly = 1,
+ srcs = ["uidgid.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:capability_util",
+ "//test/util:posix_error",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "uname_test",
+ testonly = 1,
+ srcs = ["uname.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:capability_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "unlink_test",
+ testonly = 1,
+ srcs = ["unlink.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:capability_util",
+ "//test/util:file_descriptor",
+ "//test/util:fs_util",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "unshare_test",
+ testonly = 1,
+ srcs = ["unshare.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_absl//absl/synchronization",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "utimes_test",
+ testonly = 1,
+ srcs = ["utimes.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:file_descriptor",
+ "//test/util:fs_util",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_absl//absl/time",
+ ],
+)
+
+cc_binary(
+ name = "vdso_test",
+ testonly = 1,
+ srcs = ["vdso.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:fs_util",
+ "//test/util:posix_error",
+ "//test/util:proc_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "vfork_test",
+ testonly = 1,
+ srcs = ["vfork.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:logging",
+ "//test/util:multiprocess_util",
+ "//test/util:test_util",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "wait_test",
+ testonly = 1,
+ srcs = ["wait.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:cleanup",
+ "//test/util:logging",
+ "//test/util:multiprocess_util",
+ "//test/util:posix_error",
+ "//test/util:signal_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "write_test",
+ testonly = 1,
+ srcs = ["write.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:cleanup",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "memory_accounting_test",
+ testonly = 1,
+ srcs = ["memory_accounting.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:fs_util",
+ "//test/util:posix_error",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/strings:str_format",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "semaphore_test",
+ testonly = 1,
+ srcs = ["semaphore.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:capability_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "//test/util:thread_util",
+ "@com_google_absl//absl/base:core_headers",
+ "@com_google_absl//absl/synchronization",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "shm_test",
+ testonly = 1,
+ srcs = ["shm.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:multiprocess_util",
+ "//test/util:posix_error",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_absl//absl/time",
+ ],
+)
+
+cc_binary(
+ name = "fadvise64_test",
+ testonly = 1,
+ srcs = ["fadvise64.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:file_descriptor",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "vdso_clock_gettime_test",
+ testonly = 1,
+ srcs = ["vdso_clock_gettime.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_binary(
+ name = "vsyscall_test",
+ testonly = 1,
+ srcs = ["vsyscall.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:proc_util",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_googletest//:gtest",
+ ],
+)
diff --git a/test/syscalls/linux/accept_bind.cc b/test/syscalls/linux/accept_bind.cc
new file mode 100644
index 000000000..7c6e92317
--- /dev/null
+++ b/test/syscalls/linux/accept_bind.cc
@@ -0,0 +1,600 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+#include <sys/un.h>
+#include <algorithm>
+#include <vector>
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST_P(AllSocketPairTest, Listen) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ ASSERT_THAT(listen(sockets->first_fd(), /* backlog = */ 5),
+ SyscallSucceeds());
+}
+
+TEST_P(AllSocketPairTest, ListenIncreaseBacklog) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ ASSERT_THAT(listen(sockets->first_fd(), /* backlog = */ 5),
+ SyscallSucceeds());
+ ASSERT_THAT(listen(sockets->first_fd(), /* backlog = */ 10),
+ SyscallSucceeds());
+}
+
+TEST_P(AllSocketPairTest, ListenDecreaseBacklog) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ ASSERT_THAT(listen(sockets->first_fd(), /* backlog = */ 5),
+ SyscallSucceeds());
+ ASSERT_THAT(listen(sockets->first_fd(), /* backlog = */ 1),
+ SyscallSucceeds());
+}
+
+TEST_P(AllSocketPairTest, ListenWithoutBind) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ ASSERT_THAT(listen(sockets->first_fd(), 0), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(AllSocketPairTest, DoubleBind) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->second_addr(),
+ sockets->second_addr_size()),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(AllSocketPairTest, BindListenBind) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->second_addr(),
+ sockets->second_addr_size()),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(AllSocketPairTest, DoubleListen) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+ ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+}
+
+TEST_P(AllSocketPairTest, DoubleConnect) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+ ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallFailsWithErrno(EISCONN));
+}
+
+TEST_P(AllSocketPairTest, Connect) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+ ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+}
+
+TEST_P(AllSocketPairTest, ConnectToFilePath) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ struct sockaddr_un addr = {};
+ addr.sun_family = AF_UNIX;
+ constexpr char kPath[] = "/tmp";
+ memcpy(addr.sun_path, kPath, sizeof(kPath));
+
+ ASSERT_THAT(
+ connect(sockets->second_fd(),
+ reinterpret_cast<const struct sockaddr*>(&addr), sizeof(addr)),
+ SyscallFailsWithErrno(ECONNREFUSED));
+}
+
+TEST_P(AllSocketPairTest, ConnectToInvalidAbstractPath) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ struct sockaddr_un addr = {};
+ addr.sun_family = AF_UNIX;
+ constexpr char kPath[] = "\0nonexistent";
+ memcpy(addr.sun_path, kPath, sizeof(kPath));
+
+ ASSERT_THAT(
+ connect(sockets->second_fd(),
+ reinterpret_cast<const struct sockaddr*>(&addr), sizeof(addr)),
+ SyscallFailsWithErrno(ECONNREFUSED));
+}
+
+TEST_P(AllSocketPairTest, SelfConnect) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+ ASSERT_THAT(connect(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(AllSocketPairTest, ConnectWithoutListen) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallFailsWithErrno(ECONNREFUSED));
+}
+
+TEST_P(AllSocketPairTest, Accept) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+ ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ int accepted = -1;
+ ASSERT_THAT(accepted = accept(sockets->first_fd(), nullptr, nullptr),
+ SyscallSucceeds());
+ ASSERT_THAT(close(accepted), SyscallSucceeds());
+}
+
+TEST_P(AllSocketPairTest, AcceptValidAddrLen) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+ ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ int accepted = -1;
+ struct sockaddr_un addr = {};
+ socklen_t addr_len = sizeof(addr);
+ ASSERT_THAT(
+ accepted = accept(sockets->first_fd(),
+ reinterpret_cast<struct sockaddr*>(&addr), &addr_len),
+ SyscallSucceeds());
+ ASSERT_THAT(close(accepted), SyscallSucceeds());
+}
+
+TEST_P(AllSocketPairTest, AcceptNegativeAddrLen) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+ ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ // With a negative addr_len, accept returns EINVAL,
+ struct sockaddr_un addr = {};
+ socklen_t addr_len = -1;
+ ASSERT_THAT(accept(sockets->first_fd(),
+ reinterpret_cast<struct sockaddr*>(&addr), &addr_len),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(AllSocketPairTest, AcceptLargePositiveAddrLen) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+ ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ // With a large (positive) addr_len, accept does not return EINVAL.
+ int accepted = -1;
+ char addr_buf[200];
+ socklen_t addr_len = sizeof(addr_buf);
+ ASSERT_THAT(accepted = accept(sockets->first_fd(),
+ reinterpret_cast<struct sockaddr*>(addr_buf),
+ &addr_len),
+ SyscallSucceeds());
+ // addr_len should have been updated by accept().
+ EXPECT_LT(addr_len, sizeof(addr_buf));
+ ASSERT_THAT(close(accepted), SyscallSucceeds());
+}
+
+TEST_P(AllSocketPairTest, AcceptVeryLargePositiveAddrLen) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+ ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ // With a large (positive) addr_len, accept does not return EINVAL.
+ int accepted = -1;
+ char addr_buf[2000];
+ socklen_t addr_len = sizeof(addr_buf);
+ ASSERT_THAT(accepted = accept(sockets->first_fd(),
+ reinterpret_cast<struct sockaddr*>(addr_buf),
+ &addr_len),
+ SyscallSucceeds());
+ // addr_len should have been updated by accept().
+ EXPECT_LT(addr_len, sizeof(addr_buf));
+ ASSERT_THAT(close(accepted), SyscallSucceeds());
+}
+
+TEST_P(AllSocketPairTest, AcceptWithoutBind) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ ASSERT_THAT(accept(sockets->first_fd(), nullptr, nullptr),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(AllSocketPairTest, AcceptWithoutListen) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+ ASSERT_THAT(accept(sockets->first_fd(), nullptr, nullptr),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(AllSocketPairTest, GetRemoteAddress) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+ ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ socklen_t addr_len = sockets->first_addr_size();
+ struct sockaddr_storage addr = {};
+ ASSERT_THAT(
+ getpeername(sockets->second_fd(), (struct sockaddr*)(&addr), &addr_len),
+ SyscallSucceeds());
+ EXPECT_EQ(addr_len, sockets->first_addr_len());
+ EXPECT_EQ(0, memcmp(&addr, sockets->first_addr(), sockets->first_addr_len()));
+}
+
+TEST_P(AllSocketPairTest, UnboundGetLocalAddress) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+ ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ socklen_t addr_len = sockets->first_addr_size();
+ struct sockaddr_storage addr = {};
+ ASSERT_THAT(
+ getsockname(sockets->second_fd(), (struct sockaddr*)(&addr), &addr_len),
+ SyscallSucceeds());
+ EXPECT_EQ(addr_len, 2);
+ EXPECT_EQ(
+ memcmp(&addr, sockets->second_addr(),
+ std::min((size_t)addr_len, (size_t)sockets->second_addr_len())),
+ 0);
+}
+
+TEST_P(AllSocketPairTest, BoundGetLocalAddress) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+ ASSERT_THAT(bind(sockets->second_fd(), sockets->second_addr(),
+ sockets->second_addr_size()),
+ SyscallSucceeds());
+
+ ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ socklen_t addr_len = sockets->first_addr_size();
+ struct sockaddr_storage addr = {};
+ ASSERT_THAT(
+ getsockname(sockets->second_fd(), (struct sockaddr*)(&addr), &addr_len),
+ SyscallSucceeds());
+ EXPECT_EQ(addr_len, sockets->second_addr_len());
+ EXPECT_EQ(
+ memcmp(&addr, sockets->second_addr(),
+ std::min((size_t)addr_len, (size_t)sockets->second_addr_len())),
+ 0);
+}
+
+TEST_P(AllSocketPairTest, BoundConnector) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+ ASSERT_THAT(bind(sockets->second_fd(), sockets->second_addr(),
+ sockets->second_addr_size()),
+ SyscallSucceeds());
+
+ ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+}
+
+TEST_P(AllSocketPairTest, UnboundSenderAddr) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+ ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ int accepted = -1;
+ ASSERT_THAT(accepted = accept(sockets->first_fd(), nullptr, nullptr),
+ SyscallSucceeds());
+ FileDescriptor accepted_fd(accepted);
+
+ int i = 0;
+ ASSERT_THAT(RetryEINTR(send)(sockets->second_fd(), &i, sizeof(i), 0),
+ SyscallSucceedsWithValue(sizeof(i)));
+
+ struct sockaddr_storage addr;
+ socklen_t addr_len = sizeof(addr);
+ ASSERT_THAT(
+ RetryEINTR(recvfrom)(accepted_fd.get(), &i, sizeof(i), 0,
+ reinterpret_cast<sockaddr*>(&addr), &addr_len),
+ SyscallSucceedsWithValue(sizeof(i)));
+ if (!IsRunningOnGvisor()) {
+ // Linux returns a zero length for addresses from recvfrom(2) and
+ // recvmsg(2). This differs from the behavior of getpeername(2) and
+ // getsockname(2). For simplicity, we use the getpeername(2) and
+ // getsockname(2) behavior for recvfrom(2) and recvmsg(2).
+ EXPECT_EQ(addr_len, 0);
+ return;
+ }
+ EXPECT_EQ(addr_len, 2);
+ EXPECT_EQ(
+ memcmp(&addr, sockets->second_addr(),
+ std::min((size_t)addr_len, (size_t)sockets->second_addr_len())),
+ 0);
+}
+
+TEST_P(AllSocketPairTest, BoundSenderAddr) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+ ASSERT_THAT(bind(sockets->second_fd(), sockets->second_addr(),
+ sockets->second_addr_size()),
+ SyscallSucceeds());
+
+ ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ int accepted = -1;
+ ASSERT_THAT(accepted = accept(sockets->first_fd(), nullptr, nullptr),
+ SyscallSucceeds());
+ FileDescriptor accepted_fd(accepted);
+
+ int i = 0;
+ ASSERT_THAT(RetryEINTR(send)(sockets->second_fd(), &i, sizeof(i), 0),
+ SyscallSucceedsWithValue(sizeof(i)));
+
+ struct sockaddr_storage addr;
+ socklen_t addr_len = sizeof(addr);
+ ASSERT_THAT(
+ RetryEINTR(recvfrom)(accepted_fd.get(), &i, sizeof(i), 0,
+ reinterpret_cast<sockaddr*>(&addr), &addr_len),
+ SyscallSucceedsWithValue(sizeof(i)));
+ EXPECT_EQ(addr_len, sockets->second_addr_len());
+ EXPECT_EQ(
+ memcmp(&addr, sockets->second_addr(),
+ std::min((size_t)addr_len, (size_t)sockets->second_addr_len())),
+ 0);
+}
+
+TEST_P(AllSocketPairTest, BindAfterConnectSenderAddr) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+ ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ ASSERT_THAT(bind(sockets->second_fd(), sockets->second_addr(),
+ sockets->second_addr_size()),
+ SyscallSucceeds());
+
+ int accepted = -1;
+ ASSERT_THAT(accepted = accept(sockets->first_fd(), nullptr, nullptr),
+ SyscallSucceeds());
+ FileDescriptor accepted_fd(accepted);
+
+ int i = 0;
+ ASSERT_THAT(RetryEINTR(send)(sockets->second_fd(), &i, sizeof(i), 0),
+ SyscallSucceedsWithValue(sizeof(i)));
+
+ struct sockaddr_storage addr;
+ socklen_t addr_len = sizeof(addr);
+ ASSERT_THAT(
+ RetryEINTR(recvfrom)(accepted_fd.get(), &i, sizeof(i), 0,
+ reinterpret_cast<sockaddr*>(&addr), &addr_len),
+ SyscallSucceedsWithValue(sizeof(i)));
+ EXPECT_EQ(addr_len, sockets->second_addr_len());
+ EXPECT_EQ(
+ memcmp(&addr, sockets->second_addr(),
+ std::min((size_t)addr_len, (size_t)sockets->second_addr_len())),
+ 0);
+}
+
+TEST_P(AllSocketPairTest, BindAfterAcceptSenderAddr) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+ ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ int accepted = -1;
+ ASSERT_THAT(accepted = accept(sockets->first_fd(), nullptr, nullptr),
+ SyscallSucceeds());
+ FileDescriptor accepted_fd(accepted);
+
+ ASSERT_THAT(bind(sockets->second_fd(), sockets->second_addr(),
+ sockets->second_addr_size()),
+ SyscallSucceeds());
+
+ int i = 0;
+ ASSERT_THAT(RetryEINTR(send)(sockets->second_fd(), &i, sizeof(i), 0),
+ SyscallSucceedsWithValue(sizeof(i)));
+
+ struct sockaddr_storage addr;
+ socklen_t addr_len = sizeof(addr);
+ ASSERT_THAT(
+ RetryEINTR(recvfrom)(accepted_fd.get(), &i, sizeof(i), 0,
+ reinterpret_cast<sockaddr*>(&addr), &addr_len),
+ SyscallSucceedsWithValue(sizeof(i)));
+ EXPECT_EQ(addr_len, sockets->second_addr_len());
+ EXPECT_EQ(
+ memcmp(&addr, sockets->second_addr(),
+ std::min((size_t)addr_len, (size_t)sockets->second_addr_len())),
+ 0);
+}
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, AllSocketPairTest,
+ ::testing::ValuesIn(VecCat<SocketPairKind>(
+ ApplyVec<SocketPairKind>(
+ FilesystemUnboundUnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_SEQPACKET},
+ List<int>{0, SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC})),
+ ApplyVec<SocketPairKind>(
+ AbstractUnboundUnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_SEQPACKET},
+ List<int>{0, SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC})))));
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/accept_bind_stream.cc b/test/syscalls/linux/accept_bind_stream.cc
new file mode 100644
index 000000000..f7113a6fc
--- /dev/null
+++ b/test/syscalls/linux/accept_bind_stream.cc
@@ -0,0 +1,93 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+#include <sys/un.h>
+#include <algorithm>
+#include <vector>
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST_P(AllSocketPairTest, BoundSenderAddrCoalesced) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds());
+
+ ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ int accepted = -1;
+ ASSERT_THAT(accepted = accept(sockets->first_fd(), nullptr, nullptr),
+ SyscallSucceeds());
+ FileDescriptor closer(accepted);
+
+ int i = 0;
+ ASSERT_THAT(RetryEINTR(send)(sockets->second_fd(), &i, sizeof(i), 0),
+ SyscallSucceedsWithValue(sizeof(i)));
+
+ ASSERT_THAT(bind(sockets->second_fd(), sockets->second_addr(),
+ sockets->second_addr_size()),
+ SyscallSucceeds());
+
+ i = 0;
+ ASSERT_THAT(RetryEINTR(send)(sockets->second_fd(), &i, sizeof(i), 0),
+ SyscallSucceedsWithValue(sizeof(i)));
+
+ int ri[2] = {0, 0};
+ struct sockaddr_storage addr;
+ socklen_t addr_len = sizeof(addr);
+ ASSERT_THAT(
+ RetryEINTR(recvfrom)(accepted, ri, sizeof(ri), 0,
+ reinterpret_cast<sockaddr*>(&addr), &addr_len),
+ SyscallSucceedsWithValue(sizeof(ri)));
+ EXPECT_EQ(addr_len, sockets->second_addr_len());
+
+ EXPECT_EQ(
+ memcmp(&addr, sockets->second_addr(),
+ std::min((size_t)addr_len, (size_t)sockets->second_addr_len())),
+ 0);
+}
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, AllSocketPairTest,
+ ::testing::ValuesIn(VecCat<SocketPairKind>(
+ ApplyVec<SocketPairKind>(
+ FilesystemUnboundUnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_STREAM},
+ List<int>{0, SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC})),
+ ApplyVec<SocketPairKind>(
+ AbstractUnboundUnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_STREAM},
+ List<int>{0, SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC})))));
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/access.cc b/test/syscalls/linux/access.cc
new file mode 100644
index 000000000..6ea070a5d
--- /dev/null
+++ b/test/syscalls/linux/access.cc
@@ -0,0 +1,170 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/util/capability_util.h"
+#include "test/util/fs_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+using ::testing::Ge;
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+class AccessTest : public ::testing::Test {
+ public:
+ std::string CreateTempFile(int perm) {
+ const std::string path = NewTempAbsPath();
+ const int fd = open(path.c_str(), O_CREAT | O_RDONLY, perm);
+ TEST_PCHECK(fd > 0);
+ TEST_PCHECK(close(fd) == 0);
+ return path;
+ }
+
+ protected:
+ // SetUp creates various configurations of files.
+ void SetUp() override {
+ // Move to the temporary directory. This allows us to reason more easily
+ // about absolute and relative paths.
+ ASSERT_THAT(chdir(GetAbsoluteTestTmpdir().c_str()), SyscallSucceeds());
+
+ // Create an empty file, standard permissions.
+ relfile_ = NewTempRelPath();
+ int fd;
+ ASSERT_THAT(fd = open(relfile_.c_str(), O_CREAT | O_TRUNC, 0644),
+ SyscallSucceedsWithValue(Ge(0)));
+ ASSERT_THAT(close(fd), SyscallSucceeds());
+ absfile_ = GetAbsoluteTestTmpdir() + "/" + relfile_;
+
+ // Create an empty directory, no writable permissions.
+ absdir_ = NewTempAbsPath();
+ reldir_ = JoinPath(Basename(absdir_), "");
+ ASSERT_THAT(mkdir(reldir_.c_str(), 0555), SyscallSucceeds());
+
+ // This file doesn't exist.
+ relnone_ = NewTempRelPath();
+ absnone_ = GetAbsoluteTestTmpdir() + "/" + relnone_;
+ }
+
+ // TearDown unlinks created files.
+ void TearDown() override {
+ ASSERT_THAT(unlink(absfile_.c_str()), SyscallSucceeds());
+ ASSERT_THAT(rmdir(absdir_.c_str()), SyscallSucceeds());
+ }
+
+ std::string relfile_;
+ std::string reldir_;
+
+ std::string absfile_;
+ std::string absdir_;
+
+ std::string relnone_;
+ std::string absnone_;
+};
+
+TEST_F(AccessTest, RelativeFile) {
+ EXPECT_THAT(access(relfile_.c_str(), R_OK), SyscallSucceeds());
+}
+
+TEST_F(AccessTest, RelativeDir) {
+ EXPECT_THAT(access(reldir_.c_str(), R_OK | X_OK), SyscallSucceeds());
+}
+
+TEST_F(AccessTest, AbsFile) {
+ EXPECT_THAT(access(absfile_.c_str(), R_OK), SyscallSucceeds());
+}
+
+TEST_F(AccessTest, AbsDir) {
+ EXPECT_THAT(access(absdir_.c_str(), R_OK | X_OK), SyscallSucceeds());
+}
+
+TEST_F(AccessTest, RelDoesNotExist) {
+ EXPECT_THAT(access(relnone_.c_str(), R_OK), SyscallFailsWithErrno(ENOENT));
+}
+
+TEST_F(AccessTest, AbsDoesNotExist) {
+ EXPECT_THAT(access(absnone_.c_str(), R_OK), SyscallFailsWithErrno(ENOENT));
+}
+
+TEST_F(AccessTest, InvalidMode) {
+ EXPECT_THAT(access(relfile_.c_str(), 0xffffffff),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_F(AccessTest, NoPerms) {
+ // Drop capabilities that allow us to override permissions. We must drop
+ // PERMITTED because access() checks those instead of EFFECTIVE.
+ ASSERT_NO_ERRNO(DropPermittedCapability(CAP_DAC_OVERRIDE));
+ ASSERT_NO_ERRNO(DropPermittedCapability(CAP_DAC_READ_SEARCH));
+
+ EXPECT_THAT(access(absdir_.c_str(), W_OK), SyscallFailsWithErrno(EACCES));
+}
+
+TEST_F(AccessTest, InvalidName) {
+ EXPECT_THAT(access(reinterpret_cast<char*>(0x1234), W_OK),
+ SyscallFailsWithErrno(EFAULT));
+}
+
+TEST_F(AccessTest, UsrReadOnly) {
+ // Drop capabilities that allow us to override permissions. We must drop
+ // PERMITTED because access() checks those instead of EFFECTIVE.
+ ASSERT_NO_ERRNO(DropPermittedCapability(CAP_DAC_OVERRIDE));
+ ASSERT_NO_ERRNO(DropPermittedCapability(CAP_DAC_READ_SEARCH));
+
+ const std::string filename = CreateTempFile(0400);
+ EXPECT_THAT(access(filename.c_str(), R_OK), SyscallSucceeds());
+ EXPECT_THAT(access(filename.c_str(), W_OK), SyscallFailsWithErrno(EACCES));
+ EXPECT_THAT(access(filename.c_str(), X_OK), SyscallFailsWithErrno(EACCES));
+ EXPECT_THAT(unlink(filename.c_str()), SyscallSucceeds());
+}
+
+TEST_F(AccessTest, UsrReadExec) {
+ // Drop capabilities that allow us to override permissions. We must drop
+ // PERMITTED because access() checks those instead of EFFECTIVE.
+ ASSERT_NO_ERRNO(DropPermittedCapability(CAP_DAC_OVERRIDE));
+ ASSERT_NO_ERRNO(DropPermittedCapability(CAP_DAC_READ_SEARCH));
+
+ const std::string filename = CreateTempFile(0500);
+ EXPECT_THAT(access(filename.c_str(), R_OK | X_OK), SyscallSucceeds());
+ EXPECT_THAT(access(filename.c_str(), W_OK), SyscallFailsWithErrno(EACCES));
+ EXPECT_THAT(unlink(filename.c_str()), SyscallSucceeds());
+}
+
+TEST_F(AccessTest, UsrReadWrite) {
+ const std::string filename = CreateTempFile(0600);
+ EXPECT_THAT(access(filename.c_str(), R_OK | W_OK), SyscallSucceeds());
+ EXPECT_THAT(access(filename.c_str(), X_OK), SyscallFailsWithErrno(EACCES));
+ EXPECT_THAT(unlink(filename.c_str()), SyscallSucceeds());
+}
+
+TEST_F(AccessTest, UsrReadWriteExec) {
+ const std::string filename = CreateTempFile(0700);
+ EXPECT_THAT(access(filename.c_str(), R_OK | W_OK | X_OK), SyscallSucceeds());
+ EXPECT_THAT(unlink(filename.c_str()), SyscallSucceeds());
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/affinity.cc b/test/syscalls/linux/affinity.cc
new file mode 100644
index 000000000..8a16343d5
--- /dev/null
+++ b/test/syscalls/linux/affinity.cc
@@ -0,0 +1,241 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sched.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "absl/strings/str_split.h"
+#include "test/util/cleanup.h"
+#include "test/util/fs_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+// These tests are for both the sched_getaffinity(2) and sched_setaffinity(2)
+// syscalls.
+class AffinityTest : public ::testing::Test {
+ protected:
+ void SetUp() override {
+ EXPECT_THAT(
+ // Needs use the raw syscall to get the actual size.
+ cpuset_size_ = syscall(SYS_sched_getaffinity, /*pid=*/0,
+ sizeof(cpu_set_t), &mask_),
+ SyscallSucceeds());
+ // Lots of tests rely on having more than 1 logical processor available.
+ EXPECT_GT(CPU_COUNT(&mask_), 1);
+ }
+
+ static PosixError ClearLowestBit(cpu_set_t* mask, size_t cpus) {
+ const size_t mask_size = CPU_ALLOC_SIZE(cpus);
+ for (size_t n = 0; n < cpus; ++n) {
+ if (CPU_ISSET_S(n, mask_size, mask)) {
+ CPU_CLR_S(n, mask_size, mask);
+ return NoError();
+ }
+ }
+ return PosixError(EINVAL, "No bit to clear, mask is empty");
+ }
+
+ PosixError ClearLowestBit() { return ClearLowestBit(&mask_, CPU_SETSIZE); }
+
+ // Stores the initial cpu mask for this process.
+ cpu_set_t mask_ = {};
+ int cpuset_size_ = 0;
+};
+
+// sched_getaffinity(2) is implemented.
+TEST_F(AffinityTest, SchedGetAffinityImplemented) {
+ EXPECT_THAT(sched_getaffinity(/*pid=*/0, sizeof(cpu_set_t), &mask_),
+ SyscallSucceeds());
+}
+
+// PID is not found.
+TEST_F(AffinityTest, SchedGetAffinityInvalidPID) {
+ // Flaky, but it's tough to avoid a race condition when finding an unused pid
+ EXPECT_THAT(sched_getaffinity(/*pid=*/INT_MAX - 1, sizeof(cpu_set_t), &mask_),
+ SyscallFailsWithErrno(ESRCH));
+}
+
+// PID is not found.
+TEST_F(AffinityTest, SchedSetAffinityInvalidPID) {
+ // Flaky, but it's tough to avoid a race condition when finding an unused pid
+ EXPECT_THAT(sched_setaffinity(/*pid=*/INT_MAX - 1, sizeof(cpu_set_t), &mask_),
+ SyscallFailsWithErrno(ESRCH));
+}
+
+TEST_F(AffinityTest, SchedSetAffinityZeroMask) {
+ CPU_ZERO(&mask_);
+ EXPECT_THAT(sched_setaffinity(/*pid=*/0, sizeof(cpu_set_t), &mask_),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+// N.B. This test case relies on cpuset_size_ larger than the actual number of
+// of all existing CPUs. Check your machine if the test fails.
+TEST_F(AffinityTest, SchedSetAffinityNonexistentCPUDropped) {
+ cpu_set_t mask = mask_;
+ // Add a nonexistent CPU.
+ //
+ // The number needs to be larger than the possible number of CPU available,
+ // but smaller than the number of the CPU that the kernel claims to support --
+ // it's implicitly returned by raw sched_getaffinity syscall.
+ CPU_SET(cpuset_size_ * 8 - 1, &mask);
+ EXPECT_THAT(
+ // Use raw syscall because it will be rejected by the libc wrapper
+ // otherwise.
+ syscall(SYS_sched_setaffinity, /*pid=*/0, sizeof(cpu_set_t), &mask),
+ SyscallSucceeds())
+ << "failed with cpumask : " << CPUSetToString(mask)
+ << ", cpuset_size_ : " << cpuset_size_;
+ cpu_set_t newmask;
+ EXPECT_THAT(sched_getaffinity(/*pid=*/0, sizeof(cpu_set_t), &newmask),
+ SyscallSucceeds());
+ EXPECT_TRUE(CPU_EQUAL(&mask_, &newmask))
+ << "got: " << CPUSetToString(newmask)
+ << " != expected: " << CPUSetToString(mask_);
+}
+
+TEST_F(AffinityTest, SchedSetAffinityOnlyNonexistentCPUFails) {
+ // Make an empty cpu set.
+ CPU_ZERO(&mask_);
+ // Add a nonexistent CPU.
+ //
+ // The number needs to be larger than the possible number of CPU available,
+ // but smaller than the number of the CPU that the kernel claims to support --
+ // it's implicitly returned by raw sched_getaffinity syscall.
+ int cpu = cpuset_size_ * 8 - 1;
+ if (cpu <= NumCPUs()) {
+ LOG(INFO) << "Skipping test: cpu " << cpu << " exists";
+ return;
+ }
+ CPU_SET(cpu, &mask_);
+ EXPECT_THAT(
+ // Use raw syscall because it will be rejected by the libc wrapper
+ // otherwise.
+ syscall(SYS_sched_setaffinity, /*pid=*/0, sizeof(cpu_set_t), &mask_),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_F(AffinityTest, SchedSetAffinityInvalidSize) {
+ EXPECT_GT(cpuset_size_, 0);
+ // Not big enough.
+ EXPECT_THAT(sched_getaffinity(/*pid=*/0, cpuset_size_ - 1, &mask_),
+ SyscallFailsWithErrno(EINVAL));
+ // Not a multiple of word size.
+ EXPECT_THAT(sched_getaffinity(/*pid=*/0, cpuset_size_ + 1, &mask_),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_F(AffinityTest, Sanity) {
+ ASSERT_NO_ERRNO(ClearLowestBit());
+ EXPECT_THAT(sched_setaffinity(/*pid=*/0, sizeof(cpu_set_t), &mask_),
+ SyscallSucceeds());
+ cpu_set_t newmask;
+ EXPECT_THAT(sched_getaffinity(/*pid=*/0, sizeof(cpu_set_t), &newmask),
+ SyscallSucceeds());
+ EXPECT_TRUE(CPU_EQUAL(&mask_, &newmask))
+ << "got: " << CPUSetToString(newmask)
+ << " != expected: " << CPUSetToString(mask_);
+}
+
+TEST_F(AffinityTest, NewThread) {
+ ASSERT_NO_ERRNO(ClearLowestBit());
+ ASSERT_NO_ERRNO(ClearLowestBit());
+ EXPECT_THAT(sched_setaffinity(/*pid=*/0, sizeof(cpu_set_t), &mask_),
+ SyscallSucceeds());
+ ScopedThread([this]() {
+ cpu_set_t child_mask;
+ ASSERT_THAT(sched_getaffinity(/*pid=*/0, sizeof(cpu_set_t), &child_mask),
+ SyscallSucceeds());
+ ASSERT_TRUE(CPU_EQUAL(&child_mask, &mask_))
+ << "child cpu mask: " << CPUSetToString(child_mask)
+ << " != parent cpu mask: " << CPUSetToString(mask_);
+ });
+}
+
+TEST_F(AffinityTest, ConsistentWithProcCpuInfo) {
+ // Count how many cpus are shown in /proc/cpuinfo.
+ std::string cpuinfo = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/cpuinfo"));
+ int count = 0;
+ for (auto const& line : absl::StrSplit(cpuinfo, '\n')) {
+ if (absl::StartsWith(line, "processor")) {
+ count++;
+ }
+ }
+ EXPECT_GE(count, CPU_COUNT(&mask_));
+}
+
+TEST_F(AffinityTest, ConsistentWithProcStat) {
+ // Count how many cpus are shown in /proc/stat.
+ std::string stat = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/stat"));
+ int count = 0;
+ for (auto const& line : absl::StrSplit(stat, '\n')) {
+ if (absl::StartsWith(line, "cpu") && !absl::StartsWith(line, "cpu ")) {
+ count++;
+ }
+ }
+ EXPECT_GE(count, CPU_COUNT(&mask_));
+}
+
+TEST_F(AffinityTest, SmallCpuMask) {
+ const int num_cpus = NumCPUs();
+ const size_t mask_size = CPU_ALLOC_SIZE(num_cpus);
+ cpu_set_t* mask = CPU_ALLOC(num_cpus);
+ ASSERT_NE(mask, nullptr);
+ const auto free_mask = Cleanup([&] { CPU_FREE(mask); });
+
+ CPU_ZERO_S(mask_size, mask);
+ ASSERT_THAT(sched_getaffinity(0, mask_size, mask), SyscallSucceeds());
+}
+
+TEST_F(AffinityTest, LargeCpuMask) {
+ // Allocate mask bigger than cpu_set_t normally allocates.
+ const size_t cpus = CPU_SETSIZE * 8;
+ const size_t mask_size = CPU_ALLOC_SIZE(cpus);
+
+ cpu_set_t* large_mask = CPU_ALLOC(cpus);
+ auto free_mask = Cleanup([large_mask] { CPU_FREE(large_mask); });
+ CPU_ZERO_S(mask_size, large_mask);
+
+ // Check that get affinity with large mask works as expected.
+ ASSERT_THAT(sched_getaffinity(/*pid=*/0, mask_size, large_mask),
+ SyscallSucceeds());
+ EXPECT_TRUE(CPU_EQUAL(&mask_, large_mask))
+ << "got: " << CPUSetToString(*large_mask, cpus)
+ << " != expected: " << CPUSetToString(mask_);
+
+ // Check that set affinity with large mask works as expected.
+ ASSERT_NO_ERRNO(ClearLowestBit(large_mask, cpus));
+ EXPECT_THAT(sched_setaffinity(/*pid=*/0, mask_size, large_mask),
+ SyscallSucceeds());
+
+ cpu_set_t* new_mask = CPU_ALLOC(cpus);
+ auto free_new_mask = Cleanup([new_mask] { CPU_FREE(new_mask); });
+ CPU_ZERO_S(mask_size, new_mask);
+ EXPECT_THAT(sched_getaffinity(/*pid=*/0, mask_size, new_mask),
+ SyscallSucceeds());
+
+ EXPECT_TRUE(CPU_EQUAL_S(mask_size, large_mask, new_mask))
+ << "got: " << CPUSetToString(*new_mask, cpus)
+ << " != expected: " << CPUSetToString(*large_mask, cpus);
+}
+
+} // namespace
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/aio.cc b/test/syscalls/linux/aio.cc
new file mode 100644
index 000000000..cc5392223
--- /dev/null
+++ b/test/syscalls/linux/aio.cc
@@ -0,0 +1,433 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <linux/aio_abi.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/file_base.h"
+#include "test/util/cleanup.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+constexpr char kData[] = "hello world!";
+
+int SubmitCtx(aio_context_t ctx, long nr, struct iocb** iocbpp) {
+ return syscall(__NR_io_submit, ctx, nr, iocbpp);
+}
+
+} // namespace
+
+class AIOTest : public FileTest {
+ public:
+ AIOTest() : ctx_(0) {}
+
+ int SetupContext(unsigned int nr) {
+ return syscall(__NR_io_setup, nr, &ctx_);
+ }
+
+ int Submit(long nr, struct iocb** iocbpp) {
+ return SubmitCtx(ctx_, nr, iocbpp);
+ }
+
+ int GetEvents(long min, long max, struct io_event* events,
+ struct timespec* timeout) {
+ return RetryEINTR(syscall)(__NR_io_getevents, ctx_, min, max, events,
+ timeout);
+ }
+
+ int DestroyContext() { return syscall(__NR_io_destroy, ctx_); }
+
+ void TearDown() override {
+ FileTest::TearDown();
+ if (ctx_ != 0) {
+ ASSERT_THAT(DestroyContext(), SyscallSucceeds());
+ }
+ }
+
+ struct iocb CreateCallback() {
+ struct iocb cb = {};
+ cb.aio_data = 0x123;
+ cb.aio_fildes = test_file_fd_.get();
+ cb.aio_lio_opcode = IOCB_CMD_PWRITE;
+ cb.aio_buf = reinterpret_cast<uint64_t>(kData);
+ cb.aio_offset = 0;
+ cb.aio_nbytes = strlen(kData);
+ return cb;
+ }
+
+ protected:
+ aio_context_t ctx_;
+};
+
+TEST_F(AIOTest, BasicWrite) {
+ // Copied from fs/aio.c.
+ constexpr unsigned AIO_RING_MAGIC = 0xa10a10a1;
+ struct aio_ring {
+ unsigned id;
+ unsigned nr;
+ unsigned head;
+ unsigned tail;
+ unsigned magic;
+ unsigned compat_features;
+ unsigned incompat_features;
+ unsigned header_length;
+ struct io_event io_events[0];
+ };
+
+ // Setup a context that is 128 entries deep.
+ ASSERT_THAT(SetupContext(128), SyscallSucceeds());
+
+ // Check that 'ctx_' points to a valid address. libaio uses it to check if
+ // aio implementation uses aio_ring. gVisor doesn't and returns all zeroes.
+ // Linux implements aio_ring, so skip the zeroes check.
+ //
+ // TODO: Remove when gVisor implements aio_ring.
+ auto ring = reinterpret_cast<struct aio_ring*>(ctx_);
+ auto magic = IsRunningOnGvisor() ? 0 : AIO_RING_MAGIC;
+ EXPECT_EQ(ring->magic, magic);
+
+ struct iocb cb = CreateCallback();
+ struct iocb* cbs[1] = {&cb};
+
+ // Submit the request.
+ ASSERT_THAT(Submit(1, cbs), SyscallSucceedsWithValue(1));
+
+ // Get the reply.
+ struct io_event events[1];
+ ASSERT_THAT(GetEvents(1, 1, events, nullptr), SyscallSucceedsWithValue(1));
+
+ // Verify that it is as expected.
+ EXPECT_EQ(events[0].data, 0x123);
+ EXPECT_EQ(events[0].obj, reinterpret_cast<long>(&cb));
+ EXPECT_EQ(events[0].res, strlen(kData));
+
+ // Verify that the file contains the contents.
+ char verify_buf[32] = {};
+ ASSERT_THAT(read(test_file_fd_.get(), &verify_buf[0], strlen(kData)),
+ SyscallSucceeds());
+ EXPECT_EQ(strcmp(kData, &verify_buf[0]), 0);
+}
+
+TEST_F(AIOTest, BadWrite) {
+ // Create a pipe and immediately close the read end.
+ int pipefd[2];
+ ASSERT_THAT(pipe(pipefd), SyscallSucceeds());
+
+ FileDescriptor rfd(pipefd[0]);
+ FileDescriptor wfd(pipefd[1]);
+
+ rfd.reset(); // Close the read end.
+
+ // Setup a context that is 128 entries deep.
+ ASSERT_THAT(SetupContext(128), SyscallSucceeds());
+
+ struct iocb cb = CreateCallback();
+ // Try to write to the read end.
+ cb.aio_fildes = wfd.get();
+ struct iocb* cbs[1] = {&cb};
+
+ // Submit the request.
+ ASSERT_THAT(Submit(1, cbs), SyscallSucceedsWithValue(1));
+
+ // Get the reply.
+ struct io_event events[1];
+ ASSERT_THAT(GetEvents(1, 1, events, nullptr), SyscallSucceedsWithValue(1));
+
+ // Verify that it fails with the right error code.
+ EXPECT_EQ(events[0].data, 0x123);
+ EXPECT_EQ(events[0].obj, reinterpret_cast<uint64_t>(&cb));
+ EXPECT_LT(events[0].res, 0);
+}
+
+TEST_F(AIOTest, ExitWithPendingIo) {
+ // Setup a context that is 5 entries deep.
+ ASSERT_THAT(SetupContext(5), SyscallSucceeds());
+
+ struct iocb cb = CreateCallback();
+ struct iocb* cbs[] = {&cb};
+
+ // Submit a request but don't complete it to make it pending.
+ EXPECT_THAT(Submit(1, cbs), SyscallSucceeds());
+}
+
+int Submitter(void* arg) {
+ auto test = reinterpret_cast<AIOTest*>(arg);
+
+ struct iocb cb = test->CreateCallback();
+ struct iocb* cbs[1] = {&cb};
+
+ // Submit the request.
+ TEST_CHECK(test->Submit(1, cbs) == 1);
+ return 0;
+}
+
+TEST_F(AIOTest, CloneVm) {
+ // Setup a context that is 128 entries deep.
+ ASSERT_THAT(SetupContext(128), SyscallSucceeds());
+
+ const size_t kStackSize = 5 * kPageSize;
+ std::unique_ptr<char[]> stack(new char[kStackSize]);
+ char* bp = stack.get() + kStackSize;
+ pid_t child;
+ ASSERT_THAT(child = clone(Submitter, bp, CLONE_VM | SIGCHLD,
+ reinterpret_cast<void*>(this)),
+ SyscallSucceeds());
+
+ // Get the reply.
+ struct io_event events[1];
+ ASSERT_THAT(GetEvents(1, 1, events, nullptr), SyscallSucceedsWithValue(1));
+
+ // Verify that it is as expected.
+ EXPECT_EQ(events[0].data, 0x123);
+ EXPECT_EQ(events[0].res, strlen(kData));
+
+ // Verify that the file contains the contents.
+ char verify_buf[32] = {};
+ ASSERT_THAT(read(test_file_fd_.get(), &verify_buf[0], strlen(kData)),
+ SyscallSucceeds());
+ EXPECT_EQ(strcmp(kData, &verify_buf[0]), 0);
+
+ int status;
+ ASSERT_THAT(RetryEINTR(waitpid)(child, &status, 0),
+ SyscallSucceedsWithValue(child));
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << " status " << status;
+}
+
+// Tests that AIO context can be remapped to a different address.
+TEST_F(AIOTest, Mremap) {
+ // Setup a context that is 128 entries deep.
+ ASSERT_THAT(SetupContext(128), SyscallSucceeds());
+
+ struct iocb cb = CreateCallback();
+ struct iocb* cbs[1] = {&cb};
+
+ // Reserve address space for the mremap target so we have something safe to
+ // map over.
+ //
+ // N.B. We reserve 2 pages because we'll attempt to remap to 2 pages below.
+ // That should fail with EFAULT, but will fail with EINVAL if this mmap
+ // returns the page immediately below ctx_, as
+ // [new_address, new_address+2*kPageSize) overlaps [ctx_, ctx_+kPageSize).
+ void* new_address = mmap(nullptr, 2 * kPageSize, PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ ASSERT_THAT(reinterpret_cast<intptr_t>(new_address), SyscallSucceeds());
+ auto mmap_cleanup = Cleanup([new_address] {
+ EXPECT_THAT(munmap(new_address, 2 * kPageSize), SyscallSucceeds());
+ });
+
+ // Test that remapping to a larger address fails.
+ void* res = mremap(reinterpret_cast<void*>(ctx_), kPageSize, 2 * kPageSize,
+ MREMAP_FIXED | MREMAP_MAYMOVE, new_address);
+ ASSERT_THAT(reinterpret_cast<intptr_t>(res), SyscallFailsWithErrno(EFAULT));
+
+ // Remap context 'handle' to a different address.
+ res = mremap(reinterpret_cast<void*>(ctx_), kPageSize, kPageSize,
+ MREMAP_FIXED | MREMAP_MAYMOVE, new_address);
+ ASSERT_THAT(
+ reinterpret_cast<intptr_t>(res),
+ SyscallSucceedsWithValue(reinterpret_cast<intptr_t>(new_address)));
+ mmap_cleanup.Release();
+ aio_context_t old_ctx = ctx_;
+ ctx_ = reinterpret_cast<aio_context_t>(new_address);
+
+ // Check that submitting the request with the old 'ctx_' fails.
+ ASSERT_THAT(SubmitCtx(old_ctx, 1, cbs), SyscallFailsWithErrno(EINVAL));
+
+ // Submit the request with the new 'ctx_'.
+ ASSERT_THAT(Submit(1, cbs), SyscallSucceedsWithValue(1));
+
+ // Remap again.
+ new_address =
+ mmap(nullptr, kPageSize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ ASSERT_THAT(reinterpret_cast<int64_t>(new_address), SyscallSucceeds());
+ auto mmap_cleanup2 = Cleanup([new_address] {
+ EXPECT_THAT(munmap(new_address, kPageSize), SyscallSucceeds());
+ });
+ res = mremap(reinterpret_cast<void*>(ctx_), kPageSize, kPageSize,
+ MREMAP_FIXED | MREMAP_MAYMOVE, new_address);
+ ASSERT_THAT(reinterpret_cast<int64_t>(res),
+ SyscallSucceedsWithValue(reinterpret_cast<int64_t>(new_address)));
+ mmap_cleanup2.Release();
+ ctx_ = reinterpret_cast<aio_context_t>(new_address);
+
+ // Get the reply with yet another 'ctx_' and verify it.
+ struct io_event events[1];
+ ASSERT_THAT(GetEvents(1, 1, events, nullptr), SyscallSucceedsWithValue(1));
+ EXPECT_EQ(events[0].data, 0x123);
+ EXPECT_EQ(events[0].obj, reinterpret_cast<long>(&cb));
+ EXPECT_EQ(events[0].res, strlen(kData));
+
+ // Verify that the file contains the contents.
+ char verify_buf[32] = {};
+ ASSERT_THAT(read(test_file_fd_.get(), &verify_buf[0], strlen(kData)),
+ SyscallSucceeds());
+ EXPECT_EQ(strcmp(kData, &verify_buf[0]), 0);
+}
+
+// Tests that AIO context can be replaced with a different mapping at the same
+// address and continue working. Don't ask why, but Linux allows it.
+TEST_F(AIOTest, MremapOver) {
+ // Setup a context that is 128 entries deep.
+ ASSERT_THAT(SetupContext(128), SyscallSucceeds());
+
+ struct iocb cb = CreateCallback();
+ struct iocb* cbs[1] = {&cb};
+
+ ASSERT_THAT(Submit(1, cbs), SyscallSucceedsWithValue(1));
+
+ // Allocate a new VMA, copy 'ctx_' content over, and remap it on top
+ // of 'ctx_'.
+ void* new_address = mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ ASSERT_THAT(reinterpret_cast<int64_t>(new_address), SyscallSucceeds());
+ auto mmap_cleanup = Cleanup([new_address] {
+ EXPECT_THAT(munmap(new_address, kPageSize), SyscallSucceeds());
+ });
+
+ memcpy(new_address, reinterpret_cast<void*>(ctx_), kPageSize);
+ void* res =
+ mremap(new_address, kPageSize, kPageSize, MREMAP_FIXED | MREMAP_MAYMOVE,
+ reinterpret_cast<void*>(ctx_));
+ ASSERT_THAT(reinterpret_cast<int64_t>(res), SyscallSucceedsWithValue(ctx_));
+ mmap_cleanup.Release();
+
+ // Everything continues to work just fine.
+ struct io_event events[1];
+ ASSERT_THAT(GetEvents(1, 1, events, nullptr), SyscallSucceedsWithValue(1));
+ EXPECT_EQ(events[0].data, 0x123);
+ EXPECT_EQ(events[0].obj, reinterpret_cast<long>(&cb));
+ EXPECT_EQ(events[0].res, strlen(kData));
+
+ // Verify that the file contains the contents.
+ char verify_buf[32] = {};
+ ASSERT_THAT(read(test_file_fd_.get(), &verify_buf[0], strlen(kData)),
+ SyscallSucceeds());
+ EXPECT_EQ(strcmp(kData, &verify_buf[0]), 0);
+}
+
+// Tests that AIO calls fail if context's address is inaccessible.
+TEST_F(AIOTest, Mprotect) {
+ // Setup a context that is 128 entries deep.
+ ASSERT_THAT(SetupContext(128), SyscallSucceeds());
+
+ struct iocb cb = CreateCallback();
+ struct iocb* cbs[1] = {&cb};
+
+ ASSERT_THAT(Submit(1, cbs), SyscallSucceedsWithValue(1));
+
+ // Makes the context 'handle' inaccessible and check that all subsequent
+ // calls fail.
+ ASSERT_THAT(mprotect(reinterpret_cast<void*>(ctx_), kPageSize, PROT_NONE),
+ SyscallSucceeds());
+ struct io_event events[1];
+ EXPECT_THAT(GetEvents(1, 1, events, nullptr), SyscallFailsWithErrno(EINVAL));
+ ASSERT_THAT(Submit(1, cbs), SyscallFailsWithErrno(EINVAL));
+ EXPECT_THAT(DestroyContext(), SyscallFailsWithErrno(EINVAL));
+
+ // Prevent TearDown from attempting to destroy the context and fail.
+ ctx_ = 0;
+}
+
+TEST_F(AIOTest, Timeout) {
+ // Setup a context that is 128 entries deep.
+ ASSERT_THAT(SetupContext(128), SyscallSucceeds());
+
+ struct timespec timeout;
+ timeout.tv_sec = 0;
+ timeout.tv_nsec = 10;
+ struct io_event events[1];
+ ASSERT_THAT(GetEvents(1, 1, events, &timeout), SyscallSucceedsWithValue(0));
+}
+
+class AIOReadWriteParamTest : public AIOTest,
+ public ::testing::WithParamInterface<int> {};
+
+TEST_P(AIOReadWriteParamTest, BadOffset) {
+ // Setup a context that is 128 entries deep.
+ ASSERT_THAT(SetupContext(128), SyscallSucceeds());
+
+ struct iocb cb = CreateCallback();
+ struct iocb* cbs[1] = {&cb};
+
+ // Create a buffer that we can write to.
+ char buf[] = "hello world!";
+ cb.aio_buf = reinterpret_cast<uint64_t>(buf);
+
+ // Set the operation on the callback and give a negative offset.
+ const int opcode = GetParam();
+ cb.aio_lio_opcode = opcode;
+
+ iovec iov = {};
+ if (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV) {
+ // Create a valid iovec and set it in the callback.
+ iov.iov_base = reinterpret_cast<void*>(buf);
+ iov.iov_len = 1;
+ cb.aio_buf = reinterpret_cast<uint64_t>(&iov);
+ // aio_nbytes is the number of iovecs.
+ cb.aio_nbytes = 1;
+ }
+
+ // Pass a negative offset.
+ cb.aio_offset = -1;
+
+ // Should get error on submission.
+ ASSERT_THAT(Submit(1, cbs), SyscallFailsWithErrno(EINVAL));
+}
+
+INSTANTIATE_TEST_CASE_P(BadOffset, AIOReadWriteParamTest,
+ ::testing::Values(IOCB_CMD_PREAD, IOCB_CMD_PWRITE,
+ IOCB_CMD_PREADV, IOCB_CMD_PWRITEV));
+
+class AIOVectorizedParamTest : public AIOTest,
+ public ::testing::WithParamInterface<int> {};
+
+TEST_P(AIOVectorizedParamTest, BadIOVecs) {
+ // Setup a context that is 128 entries deep.
+ ASSERT_THAT(SetupContext(128), SyscallSucceeds());
+
+ struct iocb cb = CreateCallback();
+ struct iocb* cbs[1] = {&cb};
+
+ // Modify the callback to use the operation from the param.
+ cb.aio_lio_opcode = GetParam();
+
+ // Create an iovec with address in kernel range, and pass that as the buffer.
+ iovec iov = {};
+ iov.iov_base = reinterpret_cast<void*>(0xFFFFFFFF00000000);
+ iov.iov_len = 1;
+ cb.aio_buf = reinterpret_cast<uint64_t>(&iov);
+ // aio_nbytes is the number of iovecs.
+ cb.aio_nbytes = 1;
+
+ // Should get error on submission.
+ ASSERT_THAT(Submit(1, cbs), SyscallFailsWithErrno(EFAULT));
+}
+
+INSTANTIATE_TEST_CASE_P(BadIOVecs, AIOVectorizedParamTest,
+ ::testing::Values(IOCB_CMD_PREADV, IOCB_CMD_PWRITEV));
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/alarm.cc b/test/syscalls/linux/alarm.cc
new file mode 100644
index 000000000..e0ddbb415
--- /dev/null
+++ b/test/syscalls/linux/alarm.cc
@@ -0,0 +1,193 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <signal.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/logging.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// N.B. Below, main blocks SIGALRM. Test cases must unblock it if they want
+// delivery.
+
+void do_nothing_handler(int sig, siginfo_t* siginfo, void* arg) {}
+
+// No random save as the test relies on alarm timing. Cooperative save tests
+// already cover the save between alarm and read.
+TEST(AlarmTest, Interrupt_NoRandomSave) {
+ int pipe_fds[2];
+ ASSERT_THAT(pipe(pipe_fds), SyscallSucceeds());
+
+ FileDescriptor read_fd(pipe_fds[0]);
+ FileDescriptor write_fd(pipe_fds[1]);
+
+ // Use a signal handler that interrupts but does nothing rather than using the
+ // default terminate action.
+ struct sigaction sa;
+ sa.sa_sigaction = do_nothing_handler;
+ sigfillset(&sa.sa_mask);
+ sa.sa_flags = 0;
+ auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGALRM, sa));
+
+ // Actually allow SIGALRM delivery.
+ auto mask_cleanup =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGALRM));
+
+ // Alarm in 20 second, which should be well after read blocks below.
+ ASSERT_THAT(alarm(20), SyscallSucceeds());
+
+ char buf;
+ ASSERT_THAT(read(read_fd.get(), &buf, 1), SyscallFailsWithErrno(EINTR));
+}
+
+/* Count of the number of SIGALARMS handled. */
+static volatile int alarms_received = 0;
+
+void inc_alarms_handler(int sig, siginfo_t* siginfo, void* arg) {
+ alarms_received++;
+}
+
+// No random save as the test relies on alarm timing. Cooperative save tests
+// already cover the save between alarm and read.
+TEST(AlarmTest, Restart_NoRandomSave) {
+ alarms_received = 0;
+
+ int pipe_fds[2];
+ ASSERT_THAT(pipe(pipe_fds), SyscallSucceeds());
+
+ FileDescriptor read_fd(pipe_fds[0]);
+ // Write end closed by thread below.
+
+ struct sigaction sa;
+ sa.sa_sigaction = inc_alarms_handler;
+ sigfillset(&sa.sa_mask);
+ sa.sa_flags = SA_RESTART;
+ auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGALRM, sa));
+
+ // Spawn a thread to eventually unblock the read below.
+ ScopedThread t([pipe_fds] {
+ absl::SleepFor(absl::Seconds(30));
+ EXPECT_THAT(close(pipe_fds[1]), SyscallSucceeds());
+ });
+
+ // Actually allow SIGALRM delivery.
+ auto mask_cleanup =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGALRM));
+
+ // Alarm in 20 second, which should be well after read blocks below, but
+ // before it returns.
+ ASSERT_THAT(alarm(20), SyscallSucceeds());
+
+ // Read and eventually get an EOF from the writer closing. If SA_RESTART
+ // didn't work, then the alarm would not have fired and we wouldn't increment
+ // our alarms_received count in our signal handler, or we would have not
+ // restarted the syscall gracefully, which we expect below in order to be
+ // able to get the final EOF on the pipe.
+ char buf;
+ ASSERT_THAT(read(read_fd.get(), &buf, 1), SyscallSucceeds());
+ EXPECT_EQ(alarms_received, 1);
+
+ t.Join();
+}
+
+// No random save as the test relies on alarm timing. Cooperative save tests
+// already cover the save between alarm and pause.
+TEST(AlarmTest, SaSiginfo_NoRandomSave) {
+ // Use a signal handler that interrupts but does nothing rather than using the
+ // default terminate action.
+ struct sigaction sa;
+ sa.sa_sigaction = do_nothing_handler;
+ sigfillset(&sa.sa_mask);
+ sa.sa_flags = SA_SIGINFO;
+ auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGALRM, sa));
+
+ // Actually allow SIGALRM delivery.
+ auto mask_cleanup =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGALRM));
+
+ // Alarm in 20 second, which should be well after pause blocks below.
+ ASSERT_THAT(alarm(20), SyscallSucceeds());
+ ASSERT_THAT(pause(), SyscallFailsWithErrno(EINTR));
+}
+
+// No random save as the test relies on alarm timing. Cooperative save tests
+// already cover the save between alarm and pause.
+TEST(AlarmTest, SaInterrupt_NoRandomSave) {
+ // Use a signal handler that interrupts but does nothing rather than using the
+ // default terminate action.
+ struct sigaction sa;
+ sa.sa_sigaction = do_nothing_handler;
+ sigfillset(&sa.sa_mask);
+ sa.sa_flags = SA_INTERRUPT;
+ auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGALRM, sa));
+
+ // Actually allow SIGALRM delivery.
+ auto mask_cleanup =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGALRM));
+
+ // Alarm in 20 second, which should be well after pause blocks below.
+ ASSERT_THAT(alarm(20), SyscallSucceeds());
+ ASSERT_THAT(pause(), SyscallFailsWithErrno(EINTR));
+}
+
+TEST(AlarmTest, UserModeSpinning) {
+ alarms_received = 0;
+
+ struct sigaction sa = {};
+ sa.sa_sigaction = inc_alarms_handler;
+ sigfillset(&sa.sa_mask);
+ sa.sa_flags = SA_SIGINFO;
+ auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGALRM, sa));
+
+ // Actually allow SIGALRM delivery.
+ auto mask_cleanup =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGALRM));
+
+ // Alarm in 20 second, which should be well into the loop below.
+ ASSERT_THAT(alarm(20), SyscallSucceeds());
+ // Make sure that the signal gets delivered even if we are spinning in user
+ // mode when it arrives.
+ while (!alarms_received) {
+ }
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
+
+int main(int argc, char** argv) {
+ // These tests depend on delivering SIGALRM to the main thread. Block SIGALRM
+ // so that any other threads created by TestInit will also have SIGALRM
+ // blocked.
+ sigset_t set;
+ sigemptyset(&set);
+ sigaddset(&set, SIGALRM);
+ TEST_PCHECK(sigprocmask(SIG_BLOCK, &set, nullptr) == 0);
+
+ gvisor::testing::TestInit(&argc, &argv);
+
+ return RUN_ALL_TESTS();
+}
diff --git a/test/syscalls/linux/arch_prctl.cc b/test/syscalls/linux/arch_prctl.cc
new file mode 100644
index 000000000..5687ceb86
--- /dev/null
+++ b/test/syscalls/linux/arch_prctl.cc
@@ -0,0 +1,48 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <asm/prctl.h>
+#include <sys/prctl.h>
+
+#include "gtest/gtest.h"
+#include "test/util/test_util.h"
+
+// glibc does not provide a prototype for arch_prctl() so declare it here.
+extern "C" int arch_prctl(int code, uintptr_t addr);
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(ArchPrctlTest, GetSetFS) {
+ uintptr_t orig;
+ const uintptr_t kNonCanonicalFsbase = 0x4141414142424242;
+
+ // Get the original FS.base and then set it to the same value (this is
+ // intentional because FS.base is the TLS pointer so we cannot change it
+ // arbitrarily).
+ ASSERT_THAT(arch_prctl(ARCH_GET_FS, reinterpret_cast<uintptr_t>(&orig)),
+ SyscallSucceeds());
+ ASSERT_THAT(arch_prctl(ARCH_SET_FS, orig), SyscallSucceeds());
+
+ // Trying to set FS.base to a non-canonical value should return an error.
+ ASSERT_THAT(arch_prctl(ARCH_SET_FS, kNonCanonicalFsbase),
+ SyscallFailsWithErrno(EPERM));
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/bad.cc b/test/syscalls/linux/bad.cc
new file mode 100644
index 000000000..a2634a8bf
--- /dev/null
+++ b/test/syscalls/linux/bad.cc
@@ -0,0 +1,39 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(BadSyscallTest, NotImplemented) {
+ // get_kernel_syms is not supported in Linux > 2.6, and not implemented in
+ // gVisor.
+ EXPECT_THAT(syscall(SYS_get_kernel_syms), SyscallFailsWithErrno(ENOSYS));
+}
+
+TEST(BadSyscallTest, NegativeOne) {
+ EXPECT_THAT(syscall(-1), SyscallFailsWithErrno(ENOSYS));
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/base_poll_test.cc b/test/syscalls/linux/base_poll_test.cc
new file mode 100644
index 000000000..bba0108ea
--- /dev/null
+++ b/test/syscalls/linux/base_poll_test.cc
@@ -0,0 +1,65 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/base_poll_test.h"
+
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <syscall.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "absl/memory/memory.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+static volatile int timer_fired = 0;
+static void SigAlarmHandler(int, siginfo_t*, void*) { timer_fired = 1; }
+
+BasePollTest::BasePollTest() {
+ // Register our SIGALRM handler, but save the original so we can restore in
+ // the destructor.
+ struct sigaction sa = {};
+ sa.sa_sigaction = SigAlarmHandler;
+ sigfillset(&sa.sa_mask);
+ TEST_PCHECK(sigaction(SIGALRM, &sa, &original_alarm_sa_) == 0);
+}
+
+BasePollTest::~BasePollTest() {
+ ClearTimer();
+ TEST_PCHECK(sigaction(SIGALRM, &original_alarm_sa_, nullptr) == 0);
+}
+
+void BasePollTest::SetTimer(absl::Duration duration) {
+ pid_t tgid = getpid();
+ pid_t tid = gettid();
+ ClearTimer();
+
+ // Create a new timer thread.
+ timer_ = absl::make_unique<TimerThread>(absl::Now() + duration, tgid, tid);
+}
+
+bool BasePollTest::TimerFired() const { return timer_fired; }
+
+void BasePollTest::ClearTimer() {
+ timer_.reset();
+ timer_fired = 0;
+}
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/base_poll_test.h b/test/syscalls/linux/base_poll_test.h
new file mode 100644
index 000000000..9b9b81933
--- /dev/null
+++ b/test/syscalls/linux/base_poll_test.h
@@ -0,0 +1,101 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_BASE_POLL_TEST_H_
+#define GVISOR_TEST_SYSCALLS_BASE_POLL_TEST_H_
+
+#include <signal.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <syscall.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <memory>
+
+#include "gtest/gtest.h"
+#include "absl/synchronization/mutex.h"
+#include "absl/time/time.h"
+#include "test/util/logging.h"
+#include "test/util/signal_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// TimerThread is a cancelable timer.
+class TimerThread {
+ public:
+ TimerThread(absl::Time deadline, pid_t tgid, pid_t tid)
+ : thread_([=] {
+ mu_.Lock();
+ mu_.AwaitWithDeadline(absl::Condition(&cancel_), deadline);
+ if (!cancel_) {
+ TEST_PCHECK(tgkill(tgid, tid, SIGALRM) == 0);
+ }
+ mu_.Unlock();
+ }) {}
+
+ ~TimerThread() { Cancel(); }
+
+ void Cancel() {
+ absl::MutexLock ml(&mu_);
+ cancel_ = true;
+ }
+
+ private:
+ mutable absl::Mutex mu_;
+ bool cancel_ GUARDED_BY(mu_) = false;
+
+ // Must be last to ensure that the destructor for the thread is run before
+ // any other member of the object is destroyed.
+ ScopedThread thread_;
+};
+
+// Base test fixture for poll, select, ppoll, and pselect tests.
+//
+// This fixture makes use of SIGALRM. The handler is saved in SetUp() and
+// restored in TearDown().
+class BasePollTest : public ::testing::Test {
+ protected:
+ BasePollTest();
+ ~BasePollTest() override;
+
+ // Sets a timer that will send a signal to the calling thread after
+ // `duration`.
+ void SetTimer(absl::Duration duration);
+
+ // Returns true if the timer has fired.
+ bool TimerFired() const;
+
+ // Stops the pending timer (if any) and clear the "fired" state.
+ void ClearTimer();
+
+ private:
+ // Thread that implements the timer. If the timer is stopped, timer_ is null.
+ //
+ // We have to use a thread for this purpose because tests using this fixture
+ // expect to be interrupted by the timer signal, but itimers/alarm(2) send
+ // thread-group-directed signals, which may be handled by any thread in the
+ // test process.
+ std::unique_ptr<TimerThread> timer_;
+
+ // The original SIGALRM handler, to restore in destructor.
+ struct sigaction original_alarm_sa_;
+};
+
+} // namespace testing
+} // namespace gvisor
+
+#endif // GVISOR_TEST_SYSCALLS_BASE_POLL_TEST_H_
diff --git a/test/syscalls/linux/bind.cc b/test/syscalls/linux/bind.cc
new file mode 100644
index 000000000..354e8e53c
--- /dev/null
+++ b/test/syscalls/linux/bind.cc
@@ -0,0 +1,146 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST_P(AllSocketPairTest, Bind) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+}
+
+TEST_P(AllSocketPairTest, BindTooLong) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ // first_addr is a sockaddr_storage being used as a sockaddr_un. Use the full
+ // length which is longer than expected for a Unix socket.
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sizeof(sockaddr_storage)),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(AllSocketPairTest, DoubleBindSocket) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ EXPECT_THAT(
+ bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ // Linux 4.09 returns EINVAL here, but some time before 4.19 it switched
+ // to EADDRINUSE.
+ AnyOf(SyscallFailsWithErrno(EADDRINUSE), SyscallFailsWithErrno(EINVAL)));
+}
+
+TEST_P(AllSocketPairTest, GetLocalAddr) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+ socklen_t addressLength = sockets->first_addr_size();
+ struct sockaddr_storage address = {};
+ ASSERT_THAT(getsockname(sockets->first_fd(), (struct sockaddr*)(&address),
+ &addressLength),
+ SyscallSucceeds());
+ EXPECT_EQ(
+ 0, memcmp(&address, sockets->first_addr(), sockets->first_addr_size()));
+}
+
+TEST_P(AllSocketPairTest, GetLocalAddrWithoutBind) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ socklen_t addressLength = sockets->first_addr_size();
+ struct sockaddr_storage received_address = {};
+ ASSERT_THAT(
+ getsockname(sockets->first_fd(), (struct sockaddr*)(&received_address),
+ &addressLength),
+ SyscallSucceeds());
+ struct sockaddr_storage want_address = {};
+ want_address.ss_family = sockets->first_addr()->sa_family;
+ EXPECT_EQ(0, memcmp(&received_address, &want_address, addressLength));
+}
+
+TEST_P(AllSocketPairTest, GetRemoteAddressWithoutConnect) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ socklen_t addressLength = sockets->first_addr_size();
+ struct sockaddr_storage address = {};
+ ASSERT_THAT(getpeername(sockets->second_fd(), (struct sockaddr*)(&address),
+ &addressLength),
+ SyscallFailsWithErrno(ENOTCONN));
+}
+
+TEST_P(AllSocketPairTest, DoubleBindAddress) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ EXPECT_THAT(bind(sockets->second_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallFailsWithErrno(EADDRINUSE));
+}
+
+TEST_P(AllSocketPairTest, Unbind) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+ ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds());
+
+ // Filesystem Unix sockets do not release their address when closed.
+ if (sockets->first_addr()->sa_data[0] != 0) {
+ ASSERT_THAT(bind(sockets->second_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallFailsWithErrno(EADDRINUSE));
+ return;
+ }
+
+ ASSERT_THAT(bind(sockets->second_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+ ASSERT_THAT(close(sockets->release_second_fd()), SyscallSucceeds());
+}
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, AllSocketPairTest,
+ ::testing::ValuesIn(VecCat<SocketPairKind>(
+ ApplyVec<SocketPairKind>(
+ FilesystemUnboundUnixDomainSocketPair,
+ AllBitwiseCombinations(
+ List<int>{SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET},
+ List<int>{0, SOCK_NONBLOCK}, List<int>{0, SOCK_CLOEXEC})),
+ ApplyVec<SocketPairKind>(
+ AbstractUnboundUnixDomainSocketPair,
+ AllBitwiseCombinations(
+ List<int>{SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET},
+ List<int>{0, SOCK_NONBLOCK}, List<int>{0, SOCK_CLOEXEC})))));
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/brk.cc b/test/syscalls/linux/brk.cc
new file mode 100644
index 000000000..33d353959
--- /dev/null
+++ b/test/syscalls/linux/brk.cc
@@ -0,0 +1,31 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdint.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+TEST(BrkTest, BrkSyscallReturnsOldBrkOnFailure) {
+ auto old_brk = sbrk(0);
+ EXPECT_THAT(syscall(SYS_brk, reinterpret_cast<void*>(-1)),
+ SyscallSucceedsWithValue(reinterpret_cast<uintptr_t>(old_brk)));
+}
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/chdir.cc b/test/syscalls/linux/chdir.cc
new file mode 100644
index 000000000..4905ffb23
--- /dev/null
+++ b/test/syscalls/linux/chdir.cc
@@ -0,0 +1,69 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <linux/limits.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "test/util/capability_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(ChdirTest, Success) {
+ auto old_dir = GetAbsoluteTestTmpdir();
+ auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ EXPECT_THAT(chdir(temp_dir.path().c_str()), SyscallSucceeds());
+ // Temp path destructor deletes the newly created tmp dir and Sentry rejects
+ // saving when its current dir is still pointing to the path. Switch to a
+ // permanent path here.
+ EXPECT_THAT(chdir(old_dir.c_str()), SyscallSucceeds());
+}
+
+TEST(ChdirTest, PermissionDenied) {
+ // Drop capabilities that allow us to override directory permissions.
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+ auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(
+ TempPath::CreateDirWith(GetAbsoluteTestTmpdir(), 0666 /* mode */));
+ EXPECT_THAT(chdir(temp_dir.path().c_str()), SyscallFailsWithErrno(EACCES));
+}
+
+TEST(ChdirTest, NotDir) {
+ auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ EXPECT_THAT(chdir(temp_file.path().c_str()), SyscallFailsWithErrno(ENOTDIR));
+}
+
+TEST(ChdirTest, NameTooLong) {
+ std::string name(NAME_MAX + 1, 'a');
+ ASSERT_THAT(chdir(name.c_str()), SyscallFailsWithErrno(ENAMETOOLONG));
+}
+
+TEST(ChdirTest, NotExist) {
+ EXPECT_THAT(chdir("/foo/bar"), SyscallFailsWithErrno(ENOENT));
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/chmod.cc b/test/syscalls/linux/chmod.cc
new file mode 100644
index 000000000..b7fc17946
--- /dev/null
+++ b/test/syscalls/linux/chmod.cc
@@ -0,0 +1,262 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <sys/eventfd.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <string>
+
+#include "gtest/gtest.h"
+#include "test/util/capability_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(ChmodTest, ChmodFileSucceeds) {
+ // Drop capabilities that allow us to override file permissions.
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+
+ auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+ ASSERT_THAT(chmod(file.path().c_str(), 0466), SyscallSucceeds());
+ EXPECT_THAT(open(file.path().c_str(), O_RDWR), SyscallFailsWithErrno(EACCES));
+}
+
+TEST(ChmodTest, ChmodDirSucceeds) {
+ // Drop capabilities that allow us to override file and directory permissions.
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+ auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const std::string fileInDir = NewTempAbsPathInDir(dir.path());
+
+ ASSERT_THAT(chmod(dir.path().c_str(), 0466), SyscallSucceeds());
+ EXPECT_THAT(open(fileInDir.c_str(), O_RDONLY), SyscallFailsWithErrno(EACCES));
+}
+
+TEST(ChmodTest, FchmodFileSucceeds_NoRandomSave) {
+ // Drop capabilities that allow us to file directory permissions.
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+
+ auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0666));
+ int fd;
+ ASSERT_THAT(fd = open(file.path().c_str(), O_RDWR), SyscallSucceeds());
+
+ {
+ const DisableSave ds; // File permissions are reduced.
+ ASSERT_THAT(fchmod(fd, 0444), SyscallSucceeds());
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+ }
+
+ EXPECT_THAT(open(file.path().c_str(), O_RDWR), SyscallFailsWithErrno(EACCES));
+}
+
+TEST(ChmodTest, FchmodDirSucceeds_NoRandomSave) {
+ // Drop capabilities that allow us to override file and directory permissions.
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+ auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ int fd;
+ ASSERT_THAT(fd = open(dir.path().c_str(), O_RDONLY | O_DIRECTORY),
+ SyscallSucceeds());
+
+ {
+ const DisableSave ds; // File permissions are reduced.
+ ASSERT_THAT(fchmod(fd, 0), SyscallSucceeds());
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+ }
+
+ EXPECT_THAT(open(dir.path().c_str(), O_RDONLY),
+ SyscallFailsWithErrno(EACCES));
+}
+
+TEST(ChmodTest, FchmodBadF) {
+ ASSERT_THAT(fchmod(-1, 0444), SyscallFailsWithErrno(EBADF));
+}
+
+TEST(ChmodTest, FchmodatBadF) {
+ ASSERT_THAT(fchmodat(-1, "foo", 0444, 0), SyscallFailsWithErrno(EBADF));
+}
+
+TEST(ChmodTest, FchmodatNotDir) {
+ ASSERT_THAT(fchmodat(-1, "", 0444, 0), SyscallFailsWithErrno(ENOENT));
+}
+
+TEST(ChmodTest, FchmodatFileAbsolutePath) {
+ // Drop capabilities that allow us to override file permissions.
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+
+ auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+ ASSERT_THAT(fchmodat(-1, file.path().c_str(), 0444, 0), SyscallSucceeds());
+ EXPECT_THAT(open(file.path().c_str(), O_RDWR), SyscallFailsWithErrno(EACCES));
+}
+
+TEST(ChmodTest, FchmodatDirAbsolutePath) {
+ // Drop capabilities that allow us to override file and directory permissions.
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+ auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+ int fd;
+ ASSERT_THAT(fd = open(dir.path().c_str(), O_RDONLY | O_DIRECTORY),
+ SyscallSucceeds());
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+
+ ASSERT_THAT(fchmodat(-1, dir.path().c_str(), 0, 0), SyscallSucceeds());
+ EXPECT_THAT(open(dir.path().c_str(), O_RDONLY),
+ SyscallFailsWithErrno(EACCES));
+}
+
+TEST(ChmodTest, FchmodatFile) {
+ // Drop capabilities that allow us to override file permissions.
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+
+ auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+ int parent_fd;
+ ASSERT_THAT(
+ parent_fd = open(GetAbsoluteTestTmpdir().c_str(), O_RDONLY | O_DIRECTORY),
+ SyscallSucceeds());
+
+ ASSERT_THAT(
+ fchmodat(parent_fd, std::string(Basename(temp_file.path())).c_str(), 0444, 0),
+ SyscallSucceeds());
+ EXPECT_THAT(close(parent_fd), SyscallSucceeds());
+
+ EXPECT_THAT(open(temp_file.path().c_str(), O_RDWR),
+ SyscallFailsWithErrno(EACCES));
+}
+
+TEST(ChmodTest, FchmodatDir) {
+ // Drop capabilities that allow us to override file and directory permissions.
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+ auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+ int parent_fd;
+ ASSERT_THAT(
+ parent_fd = open(GetAbsoluteTestTmpdir().c_str(), O_RDONLY | O_DIRECTORY),
+ SyscallSucceeds());
+
+ int fd;
+ ASSERT_THAT(fd = open(dir.path().c_str(), O_RDONLY | O_DIRECTORY),
+ SyscallSucceeds());
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+
+ ASSERT_THAT(fchmodat(parent_fd, std::string(Basename(dir.path())).c_str(), 0, 0),
+ SyscallSucceeds());
+ EXPECT_THAT(close(parent_fd), SyscallSucceeds());
+
+ EXPECT_THAT(open(dir.path().c_str(), O_RDONLY | O_DIRECTORY),
+ SyscallFailsWithErrno(EACCES));
+}
+
+TEST(ChmodTest, ChmodDowngradeWritability_NoRandomSave) {
+ auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0666));
+
+ int fd;
+ ASSERT_THAT(fd = open(file.path().c_str(), O_RDWR), SyscallSucceeds());
+
+ const DisableSave ds; // Permissions are dropped.
+ ASSERT_THAT(chmod(file.path().c_str(), 0444), SyscallSucceeds());
+ EXPECT_THAT(write(fd, "hello", 5), SyscallSucceedsWithValue(5));
+
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+TEST(ChmodTest, ChmodFileToNoPermissionsSucceeds) {
+ // Drop capabilities that allow us to override file permissions.
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+ auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0666));
+
+ ASSERT_THAT(chmod(file.path().c_str(), 0), SyscallSucceeds());
+
+ EXPECT_THAT(open(file.path().c_str(), O_RDONLY),
+ SyscallFailsWithErrno(EACCES));
+}
+
+TEST(ChmodTest, FchmodDowngradeWritability_NoRandomSave) {
+ auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+ int fd;
+ ASSERT_THAT(fd = open(file.path().c_str(), O_RDWR | O_CREAT, 0666),
+ SyscallSucceeds());
+
+ const DisableSave ds; // Permissions are dropped.
+ ASSERT_THAT(fchmod(fd, 0444), SyscallSucceeds());
+ EXPECT_THAT(write(fd, "hello", 5), SyscallSucceedsWithValue(5));
+
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+TEST(ChmodTest, FchmodFileToNoPermissionsSucceeds_NoRandomSave) {
+ // Drop capabilities that allow us to override file permissions.
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+ auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0666));
+
+ int fd;
+ ASSERT_THAT(fd = open(file.path().c_str(), O_RDWR), SyscallSucceeds());
+
+ {
+ const DisableSave ds; // Permissions are dropped.
+ ASSERT_THAT(fchmod(fd, 0), SyscallSucceeds());
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+ }
+
+ EXPECT_THAT(open(file.path().c_str(), O_RDONLY),
+ SyscallFailsWithErrno(EACCES));
+}
+
+// Verify that we can get a RW FD after chmod, even if a RO fd is left open.
+TEST(ChmodTest, ChmodWritableWithOpenFD) {
+ // FIXME: broken on hostfs.
+ if (IsRunningOnGvisor()) {
+ return;
+ }
+
+ TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0444));
+
+ FileDescriptor fd1 = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY));
+
+ ASSERT_THAT(fchmod(fd1.get(), 0644), SyscallSucceeds());
+
+ // This FD is writable, even though fd1 has a read-only reference to the file.
+ FileDescriptor fd2 = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR));
+
+ // fd1 is not writable, but fd2 is.
+ char c = 'a';
+ EXPECT_THAT(WriteFd(fd1.get(), &c, 1), SyscallFailsWithErrno(EBADF));
+ EXPECT_THAT(WriteFd(fd2.get(), &c, 1), SyscallSucceedsWithValue(1));
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/chown.cc b/test/syscalls/linux/chown.cc
new file mode 100644
index 000000000..aa1df05b1
--- /dev/null
+++ b/test/syscalls/linux/chown.cc
@@ -0,0 +1,200 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <grp.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/synchronization/notification.h"
+#include "test/util/capability_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+DEFINE_int32(scratch_uid1, 65534, "first scratch UID");
+DEFINE_int32(scratch_uid2, 65533, "second scratch UID");
+DEFINE_int32(scratch_gid, 65534, "first scratch GID");
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(ChownTest, FchownBadF) {
+ ASSERT_THAT(fchown(-1, 0, 0), SyscallFailsWithErrno(EBADF));
+}
+
+TEST(ChownTest, FchownatBadF) {
+ ASSERT_THAT(fchownat(-1, "fff", 0, 0, 0), SyscallFailsWithErrno(EBADF));
+}
+
+TEST(ChownTest, FchownatEmptyPath) {
+ const auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const auto fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_DIRECTORY | O_RDONLY));
+ ASSERT_THAT(fchownat(fd.get(), "", 0, 0, 0), SyscallFailsWithErrno(ENOENT));
+}
+
+using Chown =
+ std::function<PosixError(const std::string&, uid_t owner, gid_t group)>;
+
+class ChownParamTest : public ::testing::TestWithParam<Chown> {};
+
+TEST_P(ChownParamTest, ChownFileSucceeds) {
+ if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_CHOWN))) {
+ ASSERT_NO_ERRNO(SetCapability(CAP_CHOWN, false));
+ }
+
+ const auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+ // At least *try* setting to a group other than the EGID.
+ gid_t gid;
+ EXPECT_THAT(gid = getegid(), SyscallSucceeds());
+ int num_groups;
+ EXPECT_THAT(num_groups = getgroups(0, nullptr), SyscallSucceeds());
+ if (num_groups > 0) {
+ std::vector<gid_t> list(num_groups);
+ EXPECT_THAT(getgroups(list.size(), list.data()), SyscallSucceeds());
+ gid = list[0];
+ }
+
+ EXPECT_NO_ERRNO(GetParam()(file.path(), geteuid(), gid));
+
+ struct stat s = {};
+ ASSERT_THAT(stat(file.path().c_str(), &s), SyscallSucceeds());
+ EXPECT_EQ(s.st_uid, geteuid());
+ EXPECT_EQ(s.st_gid, gid);
+}
+
+TEST_P(ChownParamTest, ChownFilePermissionDenied) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID)));
+
+ const auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0777));
+
+ // Drop privileges and change IDs only in child thread, or else this parent
+ // thread won't be able to open some log files after the test ends.
+ ScopedThread([&] {
+ // Drop privileges.
+ if (HaveCapability(CAP_CHOWN).ValueOrDie()) {
+ EXPECT_NO_ERRNO(SetCapability(CAP_CHOWN, false));
+ }
+
+ // Change EUID and EGID.
+ //
+ // See note about POSIX below.
+ EXPECT_THAT(syscall(SYS_setresgid, -1, FLAGS_scratch_gid, -1),
+ SyscallSucceeds());
+ EXPECT_THAT(syscall(SYS_setresuid, -1, FLAGS_scratch_uid1, -1),
+ SyscallSucceeds());
+
+ EXPECT_THAT(GetParam()(file.path(), geteuid(), getegid()),
+ PosixErrorIs(EPERM, ::testing::ContainsRegex("chown")));
+ });
+}
+
+TEST_P(ChownParamTest, ChownFileSucceedsAsRoot) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability((CAP_CHOWN))));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability((CAP_SETUID))));
+
+ const std::string filename = NewTempAbsPath();
+
+ absl::Notification fileCreated, fileChowned;
+ // Change UID only in child thread, or else this parent thread won't be able
+ // to open some log files after the test ends.
+ ScopedThread t([&] {
+ // POSIX requires that all threads in a process share the same UIDs, so
+ // the NPTL setresuid wrappers use signals to make all threads execute the
+ // setresuid syscall. However, we want this thread to have its own set of
+ // credentials different from the parent process, so we use the raw
+ // syscall.
+ EXPECT_THAT(syscall(SYS_setresuid, -1, FLAGS_scratch_uid2, -1),
+ SyscallSucceeds());
+
+ // Create file and immediately close it.
+ FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(filename, O_CREAT | O_RDWR, 0644));
+ fd.reset(); // Close the fd.
+
+ fileCreated.Notify();
+ fileChowned.WaitForNotification();
+
+ EXPECT_THAT(open(filename.c_str(), O_RDWR), SyscallFailsWithErrno(EACCES));
+ FileDescriptor fd2 = ASSERT_NO_ERRNO_AND_VALUE(Open(filename, O_RDONLY));
+ });
+
+ fileCreated.WaitForNotification();
+
+ // Set file's owners to someone different.
+ EXPECT_NO_ERRNO(GetParam()(filename, FLAGS_scratch_uid1, FLAGS_scratch_gid));
+
+ struct stat s;
+ EXPECT_THAT(stat(filename.c_str(), &s), SyscallSucceeds());
+ EXPECT_EQ(s.st_uid, FLAGS_scratch_uid1);
+ EXPECT_EQ(s.st_gid, FLAGS_scratch_gid);
+
+ fileChowned.Notify();
+}
+
+PosixError errorFromReturn(const std::string& name, int ret) {
+ if (ret == -1) {
+ return PosixError(errno, absl::StrCat(name, " failed"));
+ }
+ return NoError();
+}
+
+INSTANTIATE_TEST_CASE_P(
+ ChownKinds, ChownParamTest,
+ ::testing::Values(
+ [](const std::string& path, uid_t owner, gid_t group) -> PosixError {
+ int rc = chown(path.c_str(), owner, group);
+ MaybeSave();
+ return errorFromReturn("chown", rc);
+ },
+ [](const std::string& path, uid_t owner, gid_t group) -> PosixError {
+ int rc = lchown(path.c_str(), owner, group);
+ MaybeSave();
+ return errorFromReturn("lchown", rc);
+ },
+ [](const std::string& path, uid_t owner, gid_t group) -> PosixError {
+ ASSIGN_OR_RETURN_ERRNO(auto fd, Open(path, O_RDWR));
+ int rc = fchown(fd.get(), owner, group);
+ MaybeSave();
+ return errorFromReturn("fchown", rc);
+ },
+ [](const std::string& path, uid_t owner, gid_t group) -> PosixError {
+ ASSIGN_OR_RETURN_ERRNO(auto fd, Open(path, O_RDWR));
+ int rc = fchownat(fd.get(), "", owner, group, AT_EMPTY_PATH);
+ MaybeSave();
+ return errorFromReturn("fchownat-fd", rc);
+ },
+ [](const std::string& path, uid_t owner, gid_t group) -> PosixError {
+ ASSIGN_OR_RETURN_ERRNO(
+ auto dirfd, Open(std::string(Dirname(path)), O_DIRECTORY | O_RDONLY));
+ int rc = fchownat(dirfd.get(), std::string(Basename(path)).c_str(), owner,
+ group, 0);
+ MaybeSave();
+ return errorFromReturn("fchownat-dirfd", rc);
+ }));
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/chroot.cc b/test/syscalls/linux/chroot.cc
new file mode 100644
index 000000000..f921f9025
--- /dev/null
+++ b/test/syscalls/linux/chroot.cc
@@ -0,0 +1,364 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <syscall.h>
+#include <unistd.h>
+#include <string>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_split.h"
+#include "absl/strings/string_view.h"
+#include "test/util/capability_util.h"
+#include "test/util/cleanup.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/mount_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+using ::testing::HasSubstr;
+using ::testing::Not;
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(ChrootTest, Success) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT)));
+
+ auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ EXPECT_THAT(chroot(temp_dir.path().c_str()), SyscallSucceeds());
+}
+
+TEST(ChrootTest, PermissionDenied) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT)));
+
+ // CAP_DAC_READ_SEARCH and CAP_DAC_OVERRIDE may override Execute permission on
+ // directories.
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+
+ auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(
+ TempPath::CreateDirWith(GetAbsoluteTestTmpdir(), 0666 /* mode */));
+ EXPECT_THAT(chroot(temp_dir.path().c_str()), SyscallFailsWithErrno(EACCES));
+}
+
+TEST(ChrootTest, NotDir) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT)));
+
+ auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ EXPECT_THAT(chroot(temp_file.path().c_str()), SyscallFailsWithErrno(ENOTDIR));
+}
+
+TEST(ChrootTest, NotExist) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT)));
+
+ EXPECT_THAT(chroot("/foo/bar"), SyscallFailsWithErrno(ENOENT));
+}
+
+TEST(ChrootTest, WithoutCapability) {
+ // Unset CAP_SYS_CHROOT.
+ ASSERT_NO_ERRNO(SetCapability(CAP_SYS_CHROOT, false));
+
+ auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ EXPECT_THAT(chroot(temp_dir.path().c_str()), SyscallFailsWithErrno(EPERM));
+}
+
+TEST(ChrootTest, CreatesNewRoot) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT)));
+
+ // Grab the initial cwd.
+ char initial_cwd[1024];
+ ASSERT_THAT(syscall(__NR_getcwd, initial_cwd, sizeof(initial_cwd)),
+ SyscallSucceeds());
+
+ auto new_root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ auto file_in_new_root =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(new_root.path()));
+
+ // chroot into new_root.
+ ASSERT_THAT(chroot(new_root.path().c_str()), SyscallSucceeds());
+
+ // getcwd should return "(unreachable)" followed by the initial_cwd.
+ char cwd[1024];
+ ASSERT_THAT(syscall(__NR_getcwd, cwd, sizeof(cwd)), SyscallSucceeds());
+ std::string expected_cwd = "(unreachable)";
+ expected_cwd += initial_cwd;
+ EXPECT_STREQ(cwd, expected_cwd.c_str());
+
+ // Should not be able to stat file by its full path.
+ struct stat statbuf;
+ EXPECT_THAT(stat(file_in_new_root.path().c_str(), &statbuf),
+ SyscallFailsWithErrno(ENOENT));
+
+ // Should be able to stat file at new rooted path.
+ auto basename = std::string(Basename(file_in_new_root.path()));
+ auto rootedFile = "/" + basename;
+ ASSERT_THAT(stat(rootedFile.c_str(), &statbuf), SyscallSucceeds());
+
+ // Should be able to stat cwd at '.' even though it's outside root.
+ ASSERT_THAT(stat(".", &statbuf), SyscallSucceeds());
+
+ // chdir into new root.
+ ASSERT_THAT(chdir("/"), SyscallSucceeds());
+
+ // getcwd should return "/".
+ EXPECT_THAT(syscall(__NR_getcwd, cwd, sizeof(cwd)), SyscallSucceeds());
+ EXPECT_STREQ(cwd, "/");
+
+ // Statting '.', '..', '/', and '/..' all return the same dev and inode.
+ struct stat statbuf_dot;
+ ASSERT_THAT(stat(".", &statbuf_dot), SyscallSucceeds());
+ struct stat statbuf_dotdot;
+ ASSERT_THAT(stat("..", &statbuf_dotdot), SyscallSucceeds());
+ EXPECT_EQ(statbuf_dot.st_dev, statbuf_dotdot.st_dev);
+ EXPECT_EQ(statbuf_dot.st_ino, statbuf_dotdot.st_ino);
+ struct stat statbuf_slash;
+ ASSERT_THAT(stat("/", &statbuf_slash), SyscallSucceeds());
+ EXPECT_EQ(statbuf_dot.st_dev, statbuf_slash.st_dev);
+ EXPECT_EQ(statbuf_dot.st_ino, statbuf_slash.st_ino);
+ struct stat statbuf_slashdotdot;
+ ASSERT_THAT(stat("/..", &statbuf_slashdotdot), SyscallSucceeds());
+ EXPECT_EQ(statbuf_dot.st_dev, statbuf_slashdotdot.st_dev);
+ EXPECT_EQ(statbuf_dot.st_ino, statbuf_slashdotdot.st_ino);
+}
+
+TEST(ChrootTest, DotDotFromOpenFD) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT)));
+
+ auto dir_outside_root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ auto fd = ASSERT_NO_ERRNO_AND_VALUE(
+ Open(dir_outside_root.path(), O_RDONLY | O_DIRECTORY));
+ auto new_root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+ // chroot into new_root.
+ ASSERT_THAT(chroot(new_root.path().c_str()), SyscallSucceeds());
+
+ // openat on fd with path .. will succeed.
+ int other_fd;
+ ASSERT_THAT(other_fd = openat(fd.get(), "..", O_RDONLY), SyscallSucceeds());
+ EXPECT_THAT(close(other_fd), SyscallSucceeds());
+
+ // getdents on fd should not error.
+ char buf[1024];
+ ASSERT_THAT(syscall(SYS_getdents, fd.get(), buf, sizeof(buf)),
+ SyscallSucceeds());
+}
+
+// Test that link resolution in a chroot can escape the root by following an
+// open proc fd.
+TEST(ChrootTest, ProcFdLinkResolutionInChroot) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT)));
+
+ const TempPath file_outside_chroot =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file_outside_chroot.path(), O_RDONLY));
+
+ const FileDescriptor proc_fd = ASSERT_NO_ERRNO_AND_VALUE(
+ Open("/proc", O_DIRECTORY | O_RDONLY | O_CLOEXEC));
+
+ auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ ASSERT_THAT(chroot(temp_dir.path().c_str()), SyscallSucceeds());
+
+ // Opening relative to an already open fd to a node outside the chroot works.
+ const FileDescriptor proc_self_fd = ASSERT_NO_ERRNO_AND_VALUE(
+ OpenAt(proc_fd.get(), "self/fd", O_DIRECTORY | O_RDONLY | O_CLOEXEC));
+
+ // Proc fd symlinks can escape the chroot if the fd the symlink refers to
+ // refers to an object outside the chroot.
+ struct stat s = {};
+ EXPECT_THAT(
+ fstatat(proc_self_fd.get(), absl::StrCat(fd.get()).c_str(), &s, 0),
+ SyscallSucceeds());
+
+ // Try to stat the stdin fd. Internally, this is handled differently from a
+ // proc fd entry pointing to a file, since stdin is backed by a host fd, and
+ // isn't a walkable path on the filesystem inside the sandbox.
+ EXPECT_THAT(fstatat(proc_self_fd.get(), "0", &s, 0), SyscallSucceeds());
+}
+
+// This test will verify that when you hold a fd to proc before entering
+// a chroot that any files inside the chroot will appear rooted to the
+// base chroot when examining /proc/self/fd/{num}.
+TEST(ChrootTest, ProcMemSelfFdsNoEscapeProcOpen) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT)));
+
+ // Get a FD to /proc before we enter the chroot.
+ const FileDescriptor proc =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/proc", O_RDONLY));
+
+ // Create and enter a chroot directory.
+ const auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ ASSERT_THAT(chroot(temp_dir.path().c_str()), SyscallSucceeds());
+
+ // Open a file inside the chroot at /foo.
+ const FileDescriptor foo =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/foo", O_CREAT | O_RDONLY, 0644));
+
+ // Examine /proc/self/fd/{foo_fd} to see if it exposes the fact that we're
+ // inside a chroot, the path should be /foo and NOT {chroot_dir}/foo.
+ const std::string fd_path = absl::StrCat("self/fd/", foo.get());
+ char buf[1024] = {};
+ size_t bytes_read = 0;
+ ASSERT_THAT(bytes_read =
+ readlinkat(proc.get(), fd_path.c_str(), buf, sizeof(buf) - 1),
+ SyscallSucceeds());
+
+ // The link should resolve to something.
+ ASSERT_GT(bytes_read, 0);
+
+ // Assert that the link doesn't contain the chroot path and is only /foo.
+ EXPECT_STREQ(buf, "/foo");
+}
+
+// This test will verify that a file inside a chroot when mmapped will not
+// expose the full file path via /proc/self/maps and instead honor the chroot.
+TEST(ChrootTest, ProcMemSelfMapsNoEscapeProcOpen) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT)));
+
+ // Get a FD to /proc before we enter the chroot.
+ const FileDescriptor proc =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/proc", O_RDONLY));
+
+ // Create and enter a chroot directory.
+ const auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ ASSERT_THAT(chroot(temp_dir.path().c_str()), SyscallSucceeds());
+
+ // Open a file inside the chroot at /foo.
+ const FileDescriptor foo =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/foo", O_CREAT | O_RDONLY, 0644));
+
+ // Mmap the newly created file.
+ void* foo_map = mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE,
+ foo.get(), 0);
+ ASSERT_THAT(reinterpret_cast<int64_t>(foo_map), SyscallSucceeds());
+
+ // Always unmap.
+ auto cleanup_map = Cleanup(
+ [&] { EXPECT_THAT(munmap(foo_map, kPageSize), SyscallSucceeds()); });
+
+ // Examine /proc/self/maps to be sure that /foo doesn't appear to be
+ // mapped with the full chroot path.
+ const FileDescriptor maps =
+ ASSERT_NO_ERRNO_AND_VALUE(OpenAt(proc.get(), "self/maps", O_RDONLY));
+
+ size_t bytes_read = 0;
+ char buf[8 * 1024] = {};
+ ASSERT_THAT(bytes_read = ReadFd(maps.get(), buf, sizeof(buf)),
+ SyscallSucceeds());
+
+ // The maps file should have something.
+ ASSERT_GT(bytes_read, 0);
+
+ // Finally we want to make sure the maps don't contain the chroot path
+ ASSERT_EQ(std::string(buf, bytes_read).find(temp_dir.path()), std::string::npos);
+}
+
+// Test that mounts outside the chroot will not appear in /proc/self/mounts or
+// /proc/self/mountinfo.
+TEST(ChrootTest, ProcMountsMountinfoNoEscape) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+ // We are going to create some mounts and then chroot. In order to be able to
+ // unmount the mounts after the test run, we must chdir to the root and use
+ // relative paths for all mounts. That way, as long as we never chdir into
+ // the new root, we can access the mounts via relative paths and unmount them.
+ ASSERT_THAT(chdir("/"), SyscallSucceeds());
+
+ // Create nested tmpfs mounts. Note the use of relative paths in Mount calls.
+ auto const outer_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ auto const outer_mount = ASSERT_NO_ERRNO_AND_VALUE(Mount(
+ "none", JoinPath(".", outer_dir.path()), "tmpfs", 0, "mode=0700", 0));
+
+ auto const inner_dir =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(outer_dir.path()));
+ auto const inner_mount = ASSERT_NO_ERRNO_AND_VALUE(Mount(
+ "none", JoinPath(".", inner_dir.path()), "tmpfs", 0, "mode=0700", 0));
+
+ // Filenames that will be checked for mounts, all relative to /proc dir.
+ std::string paths[3] = {"mounts", "self/mounts", "self/mountinfo"};
+
+ for (const std::string& path : paths) {
+ // We should have both inner and outer mounts.
+ const std::string contents =
+ ASSERT_NO_ERRNO_AND_VALUE(GetContents(JoinPath("/proc", path)));
+ EXPECT_THAT(contents, AllOf(HasSubstr(outer_dir.path()),
+ HasSubstr(inner_dir.path())));
+ // We better have at least two mounts: the mounts we created plus the root.
+ std::vector<absl::string_view> submounts =
+ absl::StrSplit(contents, '\n', absl::SkipWhitespace());
+ EXPECT_GT(submounts.size(), 2);
+ }
+
+ // Get a FD to /proc before we enter the chroot.
+ const FileDescriptor proc =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/proc", O_RDONLY));
+
+ // Chroot to outer mount.
+ ASSERT_THAT(chroot(outer_dir.path().c_str()), SyscallSucceeds());
+
+ for (const std::string& path : paths) {
+ const FileDescriptor proc_file =
+ ASSERT_NO_ERRNO_AND_VALUE(OpenAt(proc.get(), path, O_RDONLY));
+
+ // Only two mounts visible from this chroot: the inner and outer. Both
+ // paths should be relative to the new chroot.
+ const std::string contents =
+ ASSERT_NO_ERRNO_AND_VALUE(GetContentsFD(proc_file.get()));
+ EXPECT_THAT(contents,
+ AllOf(HasSubstr(absl::StrCat(Basename(inner_dir.path()))),
+ Not(HasSubstr(outer_dir.path())),
+ Not(HasSubstr(inner_dir.path()))));
+ std::vector<absl::string_view> submounts =
+ absl::StrSplit(contents, '\n', absl::SkipWhitespace());
+ EXPECT_EQ(submounts.size(), 2);
+ }
+
+ // Chroot to inner mount. We must use an absolute path accessible to our
+ // chroot.
+ const std::string inner_dir_basename =
+ absl::StrCat("/", Basename(inner_dir.path()));
+ ASSERT_THAT(chroot(inner_dir_basename.c_str()), SyscallSucceeds());
+
+ for (const std::string& path : paths) {
+ const FileDescriptor proc_file =
+ ASSERT_NO_ERRNO_AND_VALUE(OpenAt(proc.get(), path, O_RDONLY));
+ const std::string contents =
+ ASSERT_NO_ERRNO_AND_VALUE(GetContentsFD(proc_file.get()));
+
+ // Only the inner mount visible from this chroot.
+ std::vector<absl::string_view> submounts =
+ absl::StrSplit(contents, '\n', absl::SkipWhitespace());
+ EXPECT_EQ(submounts.size(), 1);
+ }
+
+ // Chroot back to ".".
+ ASSERT_THAT(chroot("."), SyscallSucceeds());
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/clock_getres.cc b/test/syscalls/linux/clock_getres.cc
new file mode 100644
index 000000000..8f8842299
--- /dev/null
+++ b/test/syscalls/linux/clock_getres.cc
@@ -0,0 +1,37 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/time.h>
+#include <time.h>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// clock_getres works regardless of whether or not a timespec is passed.
+TEST(ClockGetres, Timespec) {
+ struct timespec ts;
+ EXPECT_THAT(clock_getres(CLOCK_MONOTONIC, &ts), SyscallSucceeds());
+ EXPECT_THAT(clock_getres(CLOCK_MONOTONIC, nullptr), SyscallSucceeds());
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/clock_gettime.cc b/test/syscalls/linux/clock_gettime.cc
new file mode 100644
index 000000000..5003928be
--- /dev/null
+++ b/test/syscalls/linux/clock_gettime.cc
@@ -0,0 +1,156 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <pthread.h>
+#include <sys/time.h>
+#include <cerrno>
+#include <cstdint>
+#include <ctime>
+#include <list>
+#include <memory>
+#include <string>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+int64_t clock_gettime_nsecs(clockid_t id) {
+ struct timespec ts;
+ TEST_PCHECK(clock_gettime(id, &ts) == 0);
+ return (ts.tv_sec * 1000000000 + ts.tv_nsec);
+}
+
+// Spin on the CPU for at least ns nanoseconds, based on
+// CLOCK_THREAD_CPUTIME_ID.
+void spin_ns(int64_t ns) {
+ int64_t start = clock_gettime_nsecs(CLOCK_THREAD_CPUTIME_ID);
+ int64_t end = start + ns;
+
+ do {
+ constexpr int kLoopCount = 1000000; // large and arbitrary
+ // volatile to prevent the compiler from skipping this loop.
+ for (volatile int i = 0; i < kLoopCount; i++) {
+ }
+ } while (clock_gettime_nsecs(CLOCK_THREAD_CPUTIME_ID) < end);
+}
+
+// Test that CLOCK_PROCESS_CPUTIME_ID is a superset of CLOCK_THREAD_CPUTIME_ID.
+TEST(ClockGettime, CputimeId) {
+ constexpr int kNumThreads = 13; // arbitrary
+
+ absl::Duration spin_time = absl::Seconds(1);
+
+ // Start off the worker threads and compute the aggregate time spent by
+ // the workers. Note that we test CLOCK_PROCESS_CPUTIME_ID by having the
+ // workers execute in parallel and verifying that CLOCK_PROCESS_CPUTIME_ID
+ // accumulates the runtime of all threads.
+ int64_t start = clock_gettime_nsecs(CLOCK_PROCESS_CPUTIME_ID);
+
+ // Create a kNumThreads threads.
+ std::list<ScopedThread> threads;
+ for (int i = 0; i < kNumThreads; i++) {
+ threads.emplace_back(
+ [spin_time] { spin_ns(absl::ToInt64Nanoseconds(spin_time)); });
+ }
+ for (auto& t : threads) {
+ t.Join();
+ }
+
+ int64_t end = clock_gettime_nsecs(CLOCK_PROCESS_CPUTIME_ID);
+
+ // The aggregate time spent in the worker threads must be at least
+ // 'kNumThreads' times the time each thread spun.
+ ASSERT_GE(end - start, kNumThreads * absl::ToInt64Nanoseconds(spin_time));
+}
+
+TEST(ClockGettime, JavaThreadTime) {
+ clockid_t clockid;
+ ASSERT_EQ(0, pthread_getcpuclockid(pthread_self(), &clockid));
+ struct timespec tp;
+ ASSERT_THAT(clock_getres(clockid, &tp), SyscallSucceeds());
+ ASSERT_THAT(clock_gettime(clockid, &tp), SyscallSucceeds());
+ EXPECT_TRUE(tp.tv_sec > 0 || tp.tv_nsec > 0);
+}
+
+// There is not much to test here, since CLOCK_REALTIME may be discontiguous.
+TEST(ClockGettime, RealtimeWorks) {
+ struct timespec tp;
+ EXPECT_THAT(clock_gettime(CLOCK_REALTIME, &tp), SyscallSucceeds());
+}
+
+class MonotonicClockTest : public ::testing::TestWithParam<clockid_t> {};
+
+TEST_P(MonotonicClockTest, IsMonotonic) {
+ auto end = absl::Now() + absl::Seconds(5);
+
+ struct timespec tp;
+ EXPECT_THAT(clock_gettime(GetParam(), &tp), SyscallSucceeds());
+
+ auto prev = absl::TimeFromTimespec(tp);
+ while (absl::Now() < end) {
+ EXPECT_THAT(clock_gettime(GetParam(), &tp), SyscallSucceeds());
+ auto now = absl::TimeFromTimespec(tp);
+ EXPECT_GE(now, prev);
+ prev = now;
+ }
+}
+
+std::string PrintClockId(::testing::TestParamInfo<clockid_t> info) {
+ switch (info.param) {
+ case CLOCK_MONOTONIC:
+ return "CLOCK_MONOTONIC";
+ case CLOCK_MONOTONIC_COARSE:
+ return "CLOCK_MONOTONIC_COARSE";
+ case CLOCK_MONOTONIC_RAW:
+ return "CLOCK_MONOTONIC_RAW";
+ default:
+ return absl::StrCat(info.param);
+ }
+}
+
+INSTANTIATE_TEST_CASE_P(ClockGettime, MonotonicClockTest,
+ ::testing::Values(CLOCK_MONOTONIC,
+ CLOCK_MONOTONIC_COARSE,
+ CLOCK_MONOTONIC_RAW),
+ PrintClockId);
+
+TEST(ClockGettime, UnimplementedReturnsEINVAL) {
+ SKIP_IF(!IsRunningOnGvisor());
+
+ struct timespec tp;
+ EXPECT_THAT(clock_gettime(CLOCK_BOOTTIME, &tp),
+ SyscallFailsWithErrno(EINVAL));
+ EXPECT_THAT(clock_gettime(CLOCK_REALTIME_ALARM, &tp),
+ SyscallFailsWithErrno(EINVAL));
+ EXPECT_THAT(clock_gettime(CLOCK_BOOTTIME_ALARM, &tp),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(ClockGettime, InvalidClockIDReturnsEINVAL) {
+ struct timespec tp;
+ EXPECT_THAT(clock_gettime(-1, &tp), SyscallFailsWithErrno(EINVAL));
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/clock_nanosleep.cc b/test/syscalls/linux/clock_nanosleep.cc
new file mode 100644
index 000000000..96bb961b4
--- /dev/null
+++ b/test/syscalls/linux/clock_nanosleep.cc
@@ -0,0 +1,153 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <time.h>
+
+#include <atomic>
+#include <utility>
+
+#include "gtest/gtest.h"
+#include "absl/time/time.h"
+#include "test/util/cleanup.h"
+#include "test/util/posix_error.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+#include "test/util/timer_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// sys_clock_nanosleep is defined because the glibc clock_nanosleep returns
+// error numbers directly and does not set errno. This makes our Syscall
+// matchers look a little weird when expecting failure:
+// "SyscallSucceedsWithValue(ERRNO)".
+int sys_clock_nanosleep(clockid_t clkid, int flags,
+ const struct timespec* request,
+ struct timespec* remain) {
+ return syscall(SYS_clock_nanosleep, clkid, flags, request, remain);
+}
+
+PosixErrorOr<absl::Time> GetTime(clockid_t clk) {
+ struct timespec ts = {};
+ int rc = clock_gettime(clk, &ts);
+ MaybeSave();
+ if (rc < 0) {
+ return PosixError(errno, "clock_gettime");
+ }
+ return absl::TimeFromTimespec(ts);
+}
+
+class WallClockNanosleepTest : public ::testing::TestWithParam<clockid_t> {};
+
+TEST_P(WallClockNanosleepTest, InvalidValues) {
+ const struct timespec invalid[] = {
+ {.tv_sec = -1, .tv_nsec = -1}, {.tv_sec = 0, .tv_nsec = INT32_MIN},
+ {.tv_sec = 0, .tv_nsec = INT32_MAX}, {.tv_sec = 0, .tv_nsec = -1},
+ {.tv_sec = -1, .tv_nsec = 0},
+ };
+
+ for (auto const ts : invalid) {
+ EXPECT_THAT(sys_clock_nanosleep(GetParam(), 0, &ts, nullptr),
+ SyscallFailsWithErrno(EINVAL));
+ }
+}
+
+TEST_P(WallClockNanosleepTest, SleepOneSecond) {
+ absl::Duration const duration = absl::Seconds(1);
+ struct timespec dur = absl::ToTimespec(duration);
+
+ absl::Time const before = ASSERT_NO_ERRNO_AND_VALUE(GetTime(GetParam()));
+ EXPECT_THAT(RetryEINTR(sys_clock_nanosleep)(GetParam(), 0, &dur, &dur),
+ SyscallSucceeds());
+ absl::Time const after = ASSERT_NO_ERRNO_AND_VALUE(GetTime(GetParam()));
+
+ EXPECT_GE(after - before, duration);
+}
+
+TEST_P(WallClockNanosleepTest, InterruptedNanosleep) {
+ absl::Duration const duration = absl::Seconds(60);
+ struct timespec dur = absl::ToTimespec(duration);
+
+ // Install no-op signal handler for SIGALRM.
+ struct sigaction sa = {};
+ sigfillset(&sa.sa_mask);
+ sa.sa_handler = +[](int signo) {};
+ auto const cleanup_sa =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGALRM, sa));
+
+ // Measure time since setting the alarm, since the alarm will interrupt the
+ // sleep and hence determine how long we sleep.
+ absl::Time const before = ASSERT_NO_ERRNO_AND_VALUE(GetTime(GetParam()));
+
+ // Set an alarm to go off while sleeping.
+ struct itimerval timer = {};
+ timer.it_value.tv_sec = 1;
+ timer.it_value.tv_usec = 0;
+ timer.it_interval.tv_sec = 1;
+ timer.it_interval.tv_usec = 0;
+ auto const cleanup =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedItimer(ITIMER_REAL, timer));
+
+ EXPECT_THAT(sys_clock_nanosleep(GetParam(), 0, &dur, &dur),
+ SyscallFailsWithErrno(EINTR));
+ absl::Time const after = ASSERT_NO_ERRNO_AND_VALUE(GetTime(GetParam()));
+
+ absl::Duration const remaining = absl::DurationFromTimespec(dur);
+ EXPECT_GE(after - before + remaining, duration);
+}
+
+TEST_P(WallClockNanosleepTest, SleepUntil) {
+ absl::Time const now = ASSERT_NO_ERRNO_AND_VALUE(GetTime(GetParam()));
+ absl::Time const until = now + absl::Seconds(2);
+ struct timespec ts = absl::ToTimespec(until);
+
+ EXPECT_THAT(
+ RetryEINTR(sys_clock_nanosleep)(GetParam(), TIMER_ABSTIME, &ts, nullptr),
+ SyscallSucceeds());
+ absl::Time const after = ASSERT_NO_ERRNO_AND_VALUE(GetTime(GetParam()));
+
+ EXPECT_GE(after, until);
+}
+
+INSTANTIATE_TEST_CASE_P(Sleepers, WallClockNanosleepTest,
+ ::testing::Values(CLOCK_REALTIME, CLOCK_MONOTONIC));
+
+TEST(ClockNanosleepProcessTest, SleepFiveSeconds) {
+ absl::Duration const kDuration = absl::Seconds(5);
+ struct timespec dur = absl::ToTimespec(kDuration);
+
+ // Ensure that CLOCK_PROCESS_CPUTIME_ID advances.
+ std::atomic<bool> done(false);
+ ScopedThread t([&] {
+ while (!done.load()) {
+ }
+ });
+ auto const cleanup_done = Cleanup([&] { done.store(true); });
+
+ absl::Time const before =
+ ASSERT_NO_ERRNO_AND_VALUE(GetTime(CLOCK_PROCESS_CPUTIME_ID));
+ EXPECT_THAT(
+ RetryEINTR(sys_clock_nanosleep)(CLOCK_PROCESS_CPUTIME_ID, 0, &dur, &dur),
+ SyscallSucceeds());
+ absl::Time const after =
+ ASSERT_NO_ERRNO_AND_VALUE(GetTime(CLOCK_PROCESS_CPUTIME_ID));
+ EXPECT_GE(after - before, kDuration);
+}
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/concurrency.cc b/test/syscalls/linux/concurrency.cc
new file mode 100644
index 000000000..2c13b315c
--- /dev/null
+++ b/test/syscalls/linux/concurrency.cc
@@ -0,0 +1,124 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <signal.h>
+#include <atomic>
+
+#include "gtest/gtest.h"
+#include "absl/strings/string_view.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+// Test that a thread that never yields to the OS does not prevent other threads
+// from running.
+TEST(ConcurrencyTest, SingleProcessMultithreaded) {
+ std::atomic<int> a(0);
+
+ ScopedThread t([&a]() {
+ while (!a.load()) {
+ }
+ });
+
+ absl::SleepFor(absl::Seconds(1));
+
+ // We are still able to execute code in this thread. The other hasn't
+ // permanently hung execution in both threads.
+ a.store(1);
+}
+
+// Test that multiple threads in this process continue to execute in parallel,
+// even if an unrelated second process is spawned.
+TEST(ConcurrencyTest, MultiProcessMultithreaded) {
+ // In PID 1, start TIDs 1 and 2, and put both to sleep.
+ //
+ // Start PID 3, which spins for 5 seconds, then exits.
+ //
+ // TIDs 1 and 2 wake and attempt to Activate, which cannot occur until PID 3
+ // exits.
+ //
+ // Both TIDs 1 and 2 should be woken. If they are not both woken, the test
+ // hangs.
+ //
+ // This is all fundamentally racy. If we are failing to wake all threads, the
+ // expectation is that this test becomes flaky, rather than consistently
+ // failing.
+ //
+ // If additional background threads fail to block, we may never schedule the
+ // child, at which point this test effectively becomes
+ // MultiProcessConcurrency. That's not expected to occur.
+
+ std::atomic<int> a(0);
+ ScopedThread t([&a]() {
+ // Block so that PID 3 can execute and we can wait on its exit.
+ absl::SleepFor(absl::Seconds(1));
+ while (!a.load()) {
+ }
+ });
+
+ pid_t child_pid;
+ ASSERT_THAT(child_pid = fork(), SyscallSucceeds());
+ if (child_pid == 0) {
+ // Busy wait without making any blocking syscalls.
+ auto end = absl::Now() + absl::Seconds(5);
+ while (absl::Now() < end) {
+ }
+ _exit(0);
+ }
+
+ absl::SleepFor(absl::Seconds(1));
+
+ // If only TID 1 is woken, thread.Join will hang.
+ // If only TID 2 is woken, both will hang.
+ a.store(1);
+ t.Join();
+
+ int status = 0;
+ EXPECT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+ EXPECT_TRUE(WIFEXITED(status));
+ EXPECT_EQ(WEXITSTATUS(status), 0);
+}
+
+// Test that multiple processes can execute concurrently, even if one process
+// never yields.
+TEST(ConcurrencyTest, MultiProcessConcurrency) {
+
+ pid_t child_pid;
+ ASSERT_THAT(child_pid = fork(), SyscallSucceeds());
+ if (child_pid == 0) {
+ while (true) {
+ }
+ __builtin_unreachable();
+ }
+
+ absl::SleepFor(absl::Seconds(5));
+
+ // We are still able to execute code in this process. The other hasn't
+ // permanently hung execution in both processes.
+ ASSERT_THAT(kill(child_pid, SIGKILL), SyscallSucceeds());
+ int status = 0;
+
+ ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_EQ(WTERMSIG(status), SIGKILL);
+}
+
+} // namespace
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/creat.cc b/test/syscalls/linux/creat.cc
new file mode 100644
index 000000000..72a016b4c
--- /dev/null
+++ b/test/syscalls/linux/creat.cc
@@ -0,0 +1,57 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <string>
+
+#include "gtest/gtest.h"
+#include "test/util/fs_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+constexpr int kMode = 0666;
+
+TEST(CreatTest, CreatCreatesNewFile) {
+ std::string const path = NewTempAbsPath();
+ struct stat buf;
+ int fd;
+ ASSERT_THAT(stat(path.c_str(), &buf), SyscallFailsWithErrno(ENOENT));
+ ASSERT_THAT(fd = creat(path.c_str(), kMode), SyscallSucceeds());
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+ EXPECT_THAT(stat(path.c_str(), &buf), SyscallSucceeds());
+}
+
+TEST(CreatTest, CreatTruncatesExistingFile) {
+ auto temp_path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ int fd;
+ ASSERT_NO_ERRNO(SetContents(temp_path.path(), "non-empty"));
+ ASSERT_THAT(fd = creat(temp_path.path().c_str(), kMode), SyscallSucceeds());
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+ std::string new_contents;
+ ASSERT_NO_ERRNO(GetContents(temp_path.path(), &new_contents));
+ EXPECT_EQ("", new_contents);
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/dev.cc b/test/syscalls/linux/dev.cc
new file mode 100644
index 000000000..a140d3b30
--- /dev/null
+++ b/test/syscalls/linux/dev.cc
@@ -0,0 +1,149 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(DevTest, LseekDevUrandom) {
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/urandom", O_RDONLY));
+ EXPECT_THAT(lseek(fd.get(), -10, SEEK_CUR), SyscallSucceeds());
+ EXPECT_THAT(lseek(fd.get(), -10, SEEK_SET), SyscallSucceeds());
+ EXPECT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceeds());
+}
+
+TEST(DevTest, LseekDevNull) {
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/null", O_RDONLY));
+ EXPECT_THAT(lseek(fd.get(), -10, SEEK_CUR), SyscallSucceeds());
+ EXPECT_THAT(lseek(fd.get(), -10, SEEK_SET), SyscallSucceeds());
+ EXPECT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceeds());
+ EXPECT_THAT(lseek(fd.get(), 0, SEEK_END), SyscallSucceeds());
+}
+
+TEST(DevTest, LseekDevZero) {
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDONLY));
+ EXPECT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceeds());
+ EXPECT_THAT(lseek(fd.get(), 0, SEEK_END), SyscallSucceeds());
+}
+
+TEST(DevTest, LseekDevFull) {
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/full", O_RDONLY));
+ EXPECT_THAT(lseek(fd.get(), 123, SEEK_SET), SyscallSucceedsWithValue(0));
+ EXPECT_THAT(lseek(fd.get(), 123, SEEK_CUR), SyscallSucceedsWithValue(0));
+ EXPECT_THAT(lseek(fd.get(), 123, SEEK_END), SyscallSucceedsWithValue(0));
+}
+
+TEST(DevTest, LseekDevNullFreshFile) {
+ // Seeks to /dev/null always return 0.
+ const FileDescriptor fd1 =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/null", O_RDONLY));
+ const FileDescriptor fd2 =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/null", O_RDONLY));
+
+ EXPECT_THAT(lseek(fd1.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0));
+ EXPECT_THAT(lseek(fd1.get(), 1000, SEEK_CUR), SyscallSucceedsWithValue(0));
+ EXPECT_THAT(lseek(fd2.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0));
+
+ const FileDescriptor fd3 =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/null", O_RDONLY));
+ EXPECT_THAT(lseek(fd3.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0));
+}
+
+TEST(DevTest, OpenTruncate) {
+ // Truncation is ignored on linux and gvisor for device files.
+ ASSERT_NO_ERRNO_AND_VALUE(
+ Open("/dev/null", O_CREAT | O_TRUNC | O_WRONLY, 0644));
+ ASSERT_NO_ERRNO_AND_VALUE(
+ Open("/dev/zero", O_CREAT | O_TRUNC | O_WRONLY, 0644));
+ ASSERT_NO_ERRNO_AND_VALUE(
+ Open("/dev/full", O_CREAT | O_TRUNC | O_WRONLY, 0644));
+}
+
+TEST(DevTest, Pread64DevNull) {
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/null", O_RDONLY));
+ char buf[1];
+ EXPECT_THAT(pread64(fd.get(), buf, 1, 0), SyscallSucceedsWithValue(0));
+}
+
+TEST(DevTest, Pread64DevZero) {
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDONLY));
+ char buf[1];
+ EXPECT_THAT(pread64(fd.get(), buf, 1, 0), SyscallSucceedsWithValue(1));
+}
+
+TEST(DevTest, Pread64DevFull) {
+ // /dev/full behaves like /dev/zero with respect to reads.
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/full", O_RDONLY));
+ char buf[1];
+ EXPECT_THAT(pread64(fd.get(), buf, 1, 0), SyscallSucceedsWithValue(1));
+}
+
+TEST(DevTest, ReadDevNull) {
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/null", O_RDONLY));
+ std::vector<char> buf(1);
+ EXPECT_THAT(ReadFd(fd.get(), buf.data(), 1), SyscallSucceeds());
+}
+
+// Do not allow random save as it could lead to partial reads.
+TEST(DevTest, ReadDevZero_NoRandomSave) {
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDONLY));
+
+ constexpr int kReadSize = 128 * 1024;
+ std::vector<char> buf(kReadSize, 1);
+ EXPECT_THAT(ReadFd(fd.get(), buf.data(), kReadSize),
+ SyscallSucceedsWithValue(kReadSize));
+ EXPECT_EQ(std::vector<char>(kReadSize, 0), buf);
+}
+
+TEST(DevTest, WriteDevNull) {
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/null", O_WRONLY));
+ EXPECT_THAT(WriteFd(fd.get(), "a", 1), SyscallSucceedsWithValue(1));
+}
+
+TEST(DevTest, WriteDevZero) {
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_WRONLY));
+ EXPECT_THAT(WriteFd(fd.get(), "a", 1), SyscallSucceedsWithValue(1));
+}
+
+TEST(DevTest, WriteDevFull) {
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/full", O_WRONLY));
+ EXPECT_THAT(WriteFd(fd.get(), "a", 1), SyscallFailsWithErrno(ENOSPC));
+}
+
+} // namespace
+} // namespace testing
+
+} // namespace gvisor
diff --git a/test/syscalls/linux/dup.cc b/test/syscalls/linux/dup.cc
new file mode 100644
index 000000000..fc11844fb
--- /dev/null
+++ b/test/syscalls/linux/dup.cc
@@ -0,0 +1,139 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <sys/eventfd.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/posix_error.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+PosixErrorOr<FileDescriptor> Dup2(const FileDescriptor& fd, int target_fd) {
+ int new_fd = dup2(fd.get(), target_fd);
+ if (new_fd < 0) {
+ return PosixError(errno, "Dup2");
+ }
+ return FileDescriptor(new_fd);
+}
+
+PosixErrorOr<FileDescriptor> Dup3(const FileDescriptor& fd, int target_fd,
+ int flags) {
+ int new_fd = dup3(fd.get(), target_fd, flags);
+ if (new_fd < 0) {
+ return PosixError(errno, "Dup2");
+ }
+ return FileDescriptor(new_fd);
+}
+
+void CheckSameFile(const FileDescriptor& fd1, const FileDescriptor& fd2) {
+ struct stat stat_result1, stat_result2;
+ ASSERT_THAT(fstat(fd1.get(), &stat_result1), SyscallSucceeds());
+ ASSERT_THAT(fstat(fd2.get(), &stat_result2), SyscallSucceeds());
+ EXPECT_EQ(stat_result1.st_dev, stat_result2.st_dev);
+ EXPECT_EQ(stat_result1.st_ino, stat_result2.st_ino);
+}
+
+TEST(DupTest, Dup) {
+ auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY));
+
+ // Dup the descriptor and make sure it's the same file.
+ FileDescriptor nfd = ASSERT_NO_ERRNO_AND_VALUE(fd.Dup());
+ ASSERT_NE(fd.get(), nfd.get());
+ CheckSameFile(fd, nfd);
+}
+
+TEST(DupTest, DupClearsCloExec) {
+ FileDescriptor nfd;
+
+ // Open an eventfd file descriptor with FD_CLOEXEC descriptor flag set.
+ int event_fd = 0;
+ ASSERT_THAT(event_fd = eventfd(0, EFD_CLOEXEC), SyscallSucceeds());
+ FileDescriptor event_fd_closer(event_fd);
+
+ EXPECT_THAT(fcntl(event_fd_closer.get(), F_GETFD),
+ SyscallSucceedsWithValue(FD_CLOEXEC));
+
+ // Duplicate the descriptor. Ensure that it doesn't have FD_CLOEXEC set.
+ nfd = ASSERT_NO_ERRNO_AND_VALUE(event_fd_closer.Dup());
+ ASSERT_NE(event_fd_closer.get(), nfd.get());
+ CheckSameFile(event_fd_closer, nfd);
+ EXPECT_THAT(fcntl(nfd.get(), F_GETFD), SyscallSucceedsWithValue(0));
+}
+
+TEST(DupTest, Dup2) {
+ auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY));
+
+ // Regular dup once.
+ FileDescriptor nfd = ASSERT_NO_ERRNO_AND_VALUE(fd.Dup());
+
+ ASSERT_NE(fd.get(), nfd.get());
+ CheckSameFile(fd, nfd);
+
+ // Dup over the file above.
+ int target_fd = nfd.release();
+ FileDescriptor nfd2 = ASSERT_NO_ERRNO_AND_VALUE(Dup2(fd, target_fd));
+ EXPECT_EQ(target_fd, nfd2.get());
+ CheckSameFile(fd, nfd2);
+}
+
+TEST(DupTest, Dup2SameFD) {
+ auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY));
+
+ // Should succeed.
+ ASSERT_THAT(dup2(fd.get(), fd.get()), SyscallSucceedsWithValue(fd.get()));
+}
+
+TEST(DupTest, Dup3) {
+ auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY));
+
+ // Regular dup once.
+ FileDescriptor nfd = ASSERT_NO_ERRNO_AND_VALUE(fd.Dup());
+ ASSERT_NE(fd.get(), nfd.get());
+ CheckSameFile(fd, nfd);
+
+ // Dup over the file above, check that it has no CLOEXEC.
+ nfd = ASSERT_NO_ERRNO_AND_VALUE(Dup3(fd, nfd.release(), 0));
+ CheckSameFile(fd, nfd);
+ EXPECT_THAT(fcntl(nfd.get(), F_GETFD), SyscallSucceedsWithValue(0));
+
+ // Dup over the file again, check that it does not CLOEXEC.
+ nfd = ASSERT_NO_ERRNO_AND_VALUE(Dup3(fd, nfd.release(), O_CLOEXEC));
+ CheckSameFile(fd, nfd);
+ EXPECT_THAT(fcntl(nfd.get(), F_GETFD), SyscallSucceedsWithValue(FD_CLOEXEC));
+}
+
+TEST(DupTest, Dup3FailsSameFD) {
+ auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY));
+
+ // Only dup3 fails if the new and old fd are the same.
+ ASSERT_THAT(dup3(fd.get(), fd.get(), 0), SyscallFailsWithErrno(EINVAL));
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/epoll.cc b/test/syscalls/linux/epoll.cc
new file mode 100644
index 000000000..9ae87c00b
--- /dev/null
+++ b/test/syscalls/linux/epoll.cc
@@ -0,0 +1,468 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <limits.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/eventfd.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/posix_error.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+constexpr int kFDsPerEpoll = 3;
+constexpr uint64_t kMagicConstant = 0x0102030405060708;
+
+// Returns a new epoll file descriptor.
+PosixErrorOr<FileDescriptor> NewEpollFD() {
+ // "Since Linux 2.6.8, the size argument is ignored, but must be greater than
+ // zero." - epoll_create(2)
+ int fd = epoll_create(/* size = */ 1);
+ MaybeSave();
+ if (fd < 0) {
+ return PosixError(errno, "epoll_create");
+ }
+ return FileDescriptor(fd);
+}
+
+// Returns a new eventfd.
+PosixErrorOr<FileDescriptor> NewEventFD() {
+ int fd = eventfd(/* initval = */ 0, /* flags = */ 0);
+ MaybeSave();
+ if (fd < 0) {
+ return PosixError(errno, "eventfd");
+ }
+ return FileDescriptor(fd);
+}
+
+// Registers `target_fd` with the epoll instance represented by `epoll_fd` for
+// the epoll events `events`. Events on `target_fd` will be indicated by setting
+// data.u64 to `data` in the returned epoll_event.
+PosixError RegisterEpollFD(int epoll_fd, int target_fd, int events,
+ uint64_t data) {
+ struct epoll_event event;
+ event.events = events;
+ event.data.u64 = data;
+ int rc = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, target_fd, &event);
+ MaybeSave();
+ if (rc < 0) {
+ return PosixError(errno, "epoll_ctl");
+ }
+ return NoError();
+}
+
+uint64_t ms_elapsed(const struct timespec* begin, const struct timespec* end) {
+ return (end->tv_sec - begin->tv_sec) * 1000 +
+ (end->tv_nsec - begin->tv_nsec) / 1000000;
+}
+
+TEST(EpollTest, AllWritable) {
+ auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
+ std::vector<FileDescriptor> eventfds;
+ for (int i = 0; i < kFDsPerEpoll; i++) {
+ eventfds.push_back(ASSERT_NO_ERRNO_AND_VALUE(NewEventFD()));
+ ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), eventfds[i].get(),
+ EPOLLIN | EPOLLOUT, kMagicConstant + i));
+ }
+
+ struct epoll_event result[kFDsPerEpoll];
+ ASSERT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1),
+ SyscallSucceedsWithValue(kFDsPerEpoll));
+ // TODO: Why do some tests check epoll_event::data, and others
+ // don't? Does Linux actually guarantee that, in any of these test cases,
+ // epoll_wait will necessarily write out the epoll_events in the order that
+ // they were registered?
+ for (int i = 0; i < kFDsPerEpoll; i++) {
+ ASSERT_EQ(result[i].events, EPOLLOUT);
+ }
+}
+
+TEST(EpollTest, LastReadable) {
+ auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
+ std::vector<FileDescriptor> eventfds;
+ for (int i = 0; i < kFDsPerEpoll; i++) {
+ eventfds.push_back(ASSERT_NO_ERRNO_AND_VALUE(NewEventFD()));
+ ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), eventfds[i].get(),
+ EPOLLIN | EPOLLOUT, kMagicConstant + i));
+ }
+
+ uint64_t tmp = 1;
+ ASSERT_THAT(WriteFd(eventfds[kFDsPerEpoll - 1].get(), &tmp, sizeof(tmp)),
+ SyscallSucceedsWithValue(sizeof(tmp)));
+
+ struct epoll_event result[kFDsPerEpoll];
+ ASSERT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1),
+ SyscallSucceedsWithValue(kFDsPerEpoll));
+
+ int i;
+ for (i = 0; i < kFDsPerEpoll - 1; i++) {
+ EXPECT_EQ(result[i].events, EPOLLOUT);
+ }
+ EXPECT_EQ(result[i].events, EPOLLOUT | EPOLLIN);
+ EXPECT_EQ(result[i].data.u64, kMagicConstant + i);
+}
+
+TEST(EpollTest, LastNonWritable) {
+ auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
+ std::vector<FileDescriptor> eventfds;
+ for (int i = 0; i < kFDsPerEpoll; i++) {
+ eventfds.push_back(ASSERT_NO_ERRNO_AND_VALUE(NewEventFD()));
+ ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), eventfds[i].get(),
+ EPOLLIN | EPOLLOUT, kMagicConstant + i));
+ }
+
+ // Write the maximum value to the event fd so that writing to it again would
+ // block.
+ uint64_t tmp = ULLONG_MAX - 1;
+ ASSERT_THAT(WriteFd(eventfds[kFDsPerEpoll - 1].get(), &tmp, sizeof(tmp)),
+ SyscallSucceedsWithValue(sizeof(tmp)));
+
+ struct epoll_event result[kFDsPerEpoll];
+ ASSERT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1),
+ SyscallSucceedsWithValue(kFDsPerEpoll));
+
+ int i;
+ for (i = 0; i < kFDsPerEpoll - 1; i++) {
+ EXPECT_EQ(result[i].events, EPOLLOUT);
+ }
+ EXPECT_EQ(result[i].events, EPOLLIN);
+ EXPECT_THAT(ReadFd(eventfds[kFDsPerEpoll - 1].get(), &tmp, sizeof(tmp)),
+ sizeof(tmp));
+ EXPECT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1),
+ SyscallSucceedsWithValue(kFDsPerEpoll));
+
+ for (i = 0; i < kFDsPerEpoll; i++) {
+ EXPECT_EQ(result[i].events, EPOLLOUT);
+ }
+}
+
+TEST(EpollTest, Timeout_NoRandomSave) {
+ auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
+ std::vector<FileDescriptor> eventfds;
+ for (int i = 0; i < kFDsPerEpoll; i++) {
+ eventfds.push_back(ASSERT_NO_ERRNO_AND_VALUE(NewEventFD()));
+ ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), eventfds[i].get(), EPOLLIN,
+ kMagicConstant + i));
+ }
+
+ constexpr int kTimeoutMs = 200;
+ struct timespec begin;
+ struct timespec end;
+ struct epoll_event result[kFDsPerEpoll];
+
+ {
+ const DisableSave ds; // Timing-related.
+ EXPECT_THAT(clock_gettime(CLOCK_MONOTONIC, &begin), SyscallSucceeds());
+
+ ASSERT_THAT(
+ RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, kTimeoutMs),
+ SyscallSucceedsWithValue(0));
+ EXPECT_THAT(clock_gettime(CLOCK_MONOTONIC, &end), SyscallSucceeds());
+ }
+
+ // Check the lower bound on the timeout. Checking for an upper bound is
+ // fragile because Linux can overrun the timeout due to scheduling delays.
+ EXPECT_GT(ms_elapsed(&begin, &end), kTimeoutMs - 1);
+}
+
+void* writer(void* arg) {
+ int fd = *reinterpret_cast<int*>(arg);
+ uint64_t tmp = 1;
+
+ usleep(200000);
+ if (WriteFd(fd, &tmp, sizeof(tmp)) != sizeof(tmp)) {
+ fprintf(stderr, "writer failed: errno %s\n", strerror(errno));
+ }
+
+ return nullptr;
+}
+
+TEST(EpollTest, WaitThenUnblock) {
+ auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
+ std::vector<FileDescriptor> eventfds;
+ for (int i = 0; i < kFDsPerEpoll; i++) {
+ eventfds.push_back(ASSERT_NO_ERRNO_AND_VALUE(NewEventFD()));
+ ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), eventfds[i].get(), EPOLLIN,
+ kMagicConstant + i));
+ }
+
+ // Fire off a thread that will make at least one of the event fds readable.
+ pthread_t thread;
+ int make_readable = eventfds[0].get();
+ ASSERT_THAT(pthread_create(&thread, nullptr, writer, &make_readable),
+ SyscallSucceedsWithValue(0));
+
+ struct epoll_event result[kFDsPerEpoll];
+ EXPECT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1),
+ SyscallSucceedsWithValue(1));
+ EXPECT_THAT(pthread_detach(thread), SyscallSucceeds());
+}
+
+void sighandler(int s) {}
+
+void* signaler(void* arg) {
+ pthread_t* t = reinterpret_cast<pthread_t*>(arg);
+ // Repeatedly send the real-time signal until we are detached, because it's
+ // difficult to know exactly when epoll_wait on another thread (which this
+ // is intending to interrupt) has started blocking.
+ while (1) {
+ usleep(200000);
+ pthread_kill(*t, SIGRTMIN);
+ }
+ return nullptr;
+}
+
+TEST(EpollTest, UnblockWithSignal) {
+ auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
+ std::vector<FileDescriptor> eventfds;
+ for (int i = 0; i < kFDsPerEpoll; i++) {
+ eventfds.push_back(ASSERT_NO_ERRNO_AND_VALUE(NewEventFD()));
+ ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), eventfds[i].get(), EPOLLIN,
+ kMagicConstant + i));
+ }
+
+ signal(SIGRTMIN, sighandler);
+ // Unblock the real time signals that InitGoogle blocks :(
+ sigset_t unblock;
+ sigemptyset(&unblock);
+ sigaddset(&unblock, SIGRTMIN);
+ ASSERT_THAT(sigprocmask(SIG_UNBLOCK, &unblock, nullptr), SyscallSucceeds());
+
+ pthread_t thread;
+ pthread_t cur = pthread_self();
+ ASSERT_THAT(pthread_create(&thread, nullptr, signaler, &cur),
+ SyscallSucceedsWithValue(0));
+
+ struct epoll_event result[kFDsPerEpoll];
+ EXPECT_THAT(epoll_wait(epollfd.get(), result, kFDsPerEpoll, -1),
+ SyscallFailsWithErrno(EINTR));
+ EXPECT_THAT(pthread_cancel(thread), SyscallSucceeds());
+ EXPECT_THAT(pthread_detach(thread), SyscallSucceeds());
+}
+
+TEST(EpollTest, TimeoutNoFds) {
+ auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
+ struct epoll_event result[kFDsPerEpoll];
+ EXPECT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, 100),
+ SyscallSucceedsWithValue(0));
+}
+
+struct addr_ctx {
+ int epollfd;
+ int eventfd;
+};
+
+void* fd_adder(void* arg) {
+ struct addr_ctx* actx = reinterpret_cast<struct addr_ctx*>(arg);
+ struct epoll_event event;
+ event.events = EPOLLIN | EPOLLOUT;
+ event.data.u64 = 0xdeadbeeffacefeed;
+
+ usleep(200000);
+ if (epoll_ctl(actx->epollfd, EPOLL_CTL_ADD, actx->eventfd, &event) == -1) {
+ fprintf(stderr, "epoll_ctl failed: %s\n", strerror(errno));
+ }
+
+ return nullptr;
+}
+
+TEST(EpollTest, UnblockWithNewFD) {
+ auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
+ auto eventfd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD());
+
+ pthread_t thread;
+ struct addr_ctx actx = {epollfd.get(), eventfd.get()};
+ ASSERT_THAT(pthread_create(&thread, nullptr, fd_adder, &actx),
+ SyscallSucceedsWithValue(0));
+
+ struct epoll_event result[kFDsPerEpoll];
+ // Wait while no FDs are ready, but after 200ms fd_adder will add a ready FD
+ // to epoll which will wake us up.
+ EXPECT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1),
+ SyscallSucceedsWithValue(1));
+ EXPECT_THAT(pthread_detach(thread), SyscallSucceeds());
+ EXPECT_EQ(result[0].data.u64, 0xdeadbeeffacefeed);
+}
+
+TEST(EpollTest, Oneshot) {
+ auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
+ std::vector<FileDescriptor> eventfds;
+ for (int i = 0; i < kFDsPerEpoll; i++) {
+ eventfds.push_back(ASSERT_NO_ERRNO_AND_VALUE(NewEventFD()));
+ ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), eventfds[i].get(), EPOLLIN,
+ kMagicConstant + i));
+ }
+
+ struct epoll_event event;
+ event.events = EPOLLOUT | EPOLLONESHOT;
+ event.data.u64 = kMagicConstant;
+ ASSERT_THAT(
+ epoll_ctl(epollfd.get(), EPOLL_CTL_MOD, eventfds[0].get(), &event),
+ SyscallSucceeds());
+
+ struct epoll_event result[kFDsPerEpoll];
+ // One-shot entry means that the first epoll_wait should succeed.
+ ASSERT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1),
+ SyscallSucceedsWithValue(1));
+ EXPECT_EQ(result[0].data.u64, kMagicConstant);
+
+ // One-shot entry means that the second epoll_wait should timeout.
+ EXPECT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, 100),
+ SyscallSucceedsWithValue(0));
+}
+
+TEST(EpollTest, EdgeTriggered_NoRandomSave) {
+ // Test edge-triggered entry: make it edge-triggered, first wait should
+ // return it, second one should time out, make it writable again, third wait
+ // should return it, fourth wait should timeout.
+ auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
+ auto eventfd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD());
+ ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), eventfd.get(),
+ EPOLLOUT | EPOLLET, kMagicConstant));
+
+ struct epoll_event result[kFDsPerEpoll];
+
+ {
+ const DisableSave ds; // May trigger spurious event.
+
+ // Edge-triggered entry means that the first epoll_wait should return the
+ // event.
+ ASSERT_THAT(epoll_wait(epollfd.get(), result, kFDsPerEpoll, -1),
+ SyscallSucceedsWithValue(1));
+ EXPECT_EQ(result[0].data.u64, kMagicConstant);
+
+ // Edge-triggered entry means that the second epoll_wait should time out.
+ ASSERT_THAT(epoll_wait(epollfd.get(), result, kFDsPerEpoll, 100),
+ SyscallSucceedsWithValue(0));
+ }
+
+ uint64_t tmp = ULLONG_MAX - 1;
+
+ // Make an fd non-writable.
+ ASSERT_THAT(WriteFd(eventfd.get(), &tmp, sizeof(tmp)),
+ SyscallSucceedsWithValue(sizeof(tmp)));
+
+ // Make the same fd non-writable to trigger a change, which will trigger an
+ // edge-triggered event.
+ ASSERT_THAT(ReadFd(eventfd.get(), &tmp, sizeof(tmp)),
+ SyscallSucceedsWithValue(sizeof(tmp)));
+
+ {
+ const DisableSave ds; // May trigger spurious event.
+
+ // An edge-triggered event should now be returned.
+ ASSERT_THAT(epoll_wait(epollfd.get(), result, kFDsPerEpoll, -1),
+ SyscallSucceedsWithValue(1));
+ EXPECT_EQ(result[0].data.u64, kMagicConstant);
+
+ // The edge-triggered event had been consumed above, we don't expect to
+ // get it again.
+ ASSERT_THAT(epoll_wait(epollfd.get(), result, kFDsPerEpoll, 100),
+ SyscallSucceedsWithValue(0));
+ }
+}
+
+TEST(EpollTest, OneshotAndEdgeTriggered) {
+ auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
+ auto eventfd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD());
+ ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), eventfd.get(),
+ EPOLLOUT | EPOLLET | EPOLLONESHOT,
+ kMagicConstant));
+
+ struct epoll_event result[kFDsPerEpoll];
+ // First time one shot edge-triggered entry means that epoll_wait should
+ // return the event.
+ ASSERT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1),
+ SyscallSucceedsWithValue(1));
+ EXPECT_EQ(result[0].data.u64, kMagicConstant);
+
+ // Edge-triggered entry means that the second epoll_wait should time out.
+ ASSERT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, 100),
+ SyscallSucceedsWithValue(0));
+
+ uint64_t tmp = ULLONG_MAX - 1;
+ // Make an fd non-writable.
+ ASSERT_THAT(WriteFd(eventfd.get(), &tmp, sizeof(tmp)),
+ SyscallSucceedsWithValue(sizeof(tmp)));
+ // Make the same fd non-writable to trigger a change, which will not trigger
+ // an edge-triggered event because we've also included EPOLLONESHOT.
+ ASSERT_THAT(ReadFd(eventfd.get(), &tmp, sizeof(tmp)),
+ SyscallSucceedsWithValue(sizeof(tmp)));
+ ASSERT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, 100),
+ SyscallSucceedsWithValue(0));
+}
+
+TEST(EpollTest, CycleOfOneDisallowed) {
+ auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
+
+ struct epoll_event event;
+ event.events = EPOLLOUT;
+ event.data.u64 = kMagicConstant;
+
+ ASSERT_THAT(epoll_ctl(epollfd.get(), EPOLL_CTL_ADD, epollfd.get(), &event),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(EpollTest, CycleOfThreeDisallowed) {
+ auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
+ auto epollfd1 = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
+ auto epollfd2 = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
+
+ ASSERT_NO_ERRNO(
+ RegisterEpollFD(epollfd.get(), epollfd1.get(), EPOLLIN, kMagicConstant));
+ ASSERT_NO_ERRNO(
+ RegisterEpollFD(epollfd1.get(), epollfd2.get(), EPOLLIN, kMagicConstant));
+
+ struct epoll_event event;
+ event.events = EPOLLIN;
+ event.data.u64 = kMagicConstant;
+ EXPECT_THAT(epoll_ctl(epollfd2.get(), EPOLL_CTL_ADD, epollfd.get(), &event),
+ SyscallFailsWithErrno(ELOOP));
+}
+
+TEST(EpollTest, CloseFile) {
+ auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
+ auto eventfd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD());
+ ASSERT_NO_ERRNO(
+ RegisterEpollFD(epollfd.get(), eventfd.get(), EPOLLOUT, kMagicConstant));
+
+ struct epoll_event result[kFDsPerEpoll];
+ ASSERT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1),
+ SyscallSucceedsWithValue(1));
+ EXPECT_EQ(result[0].data.u64, kMagicConstant);
+
+ // Close the event fd early.
+ eventfd.reset();
+
+ EXPECT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, 100),
+ SyscallSucceedsWithValue(0));
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/eventfd.cc b/test/syscalls/linux/eventfd.cc
new file mode 100644
index 000000000..ffcd20622
--- /dev/null
+++ b/test/syscalls/linux/eventfd.cc
@@ -0,0 +1,189 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/eventfd.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(EventfdTest, Nonblock) {
+ int efd;
+ ASSERT_THAT(efd = eventfd(0, EFD_NONBLOCK | EFD_SEMAPHORE),
+ SyscallSucceeds());
+
+ uint64_t l;
+ ASSERT_THAT(read(efd, &l, sizeof(l)), SyscallFailsWithErrno(EAGAIN));
+
+ l = 1;
+ ASSERT_THAT(write(efd, &l, sizeof(l)), SyscallSucceeds());
+
+ l = 0;
+ ASSERT_THAT(read(efd, &l, sizeof(l)), SyscallSucceeds());
+ EXPECT_EQ(l, 1);
+
+ ASSERT_THAT(read(efd, &l, sizeof(l)), SyscallFailsWithErrno(EAGAIN));
+}
+
+void* read_three_times(void* arg) {
+ int efd = *reinterpret_cast<int*>(arg);
+ uint64_t l;
+ read(efd, &l, sizeof(l));
+ read(efd, &l, sizeof(l));
+ read(efd, &l, sizeof(l));
+ return nullptr;
+}
+
+TEST(EventfdTest, BlockingWrite) {
+ int efd;
+ ASSERT_THAT(efd = eventfd(0, EFD_SEMAPHORE), SyscallSucceeds());
+
+ pthread_t p;
+ ASSERT_THAT(pthread_create(&p, nullptr, read_three_times,
+ reinterpret_cast<void*>(&efd)),
+ SyscallSucceeds());
+
+ uint64_t l = 1;
+ ASSERT_THAT(write(efd, &l, sizeof(l)), SyscallSucceeds());
+ EXPECT_EQ(l, 1);
+
+ ASSERT_THAT(write(efd, &l, sizeof(l)), SyscallSucceeds());
+ EXPECT_EQ(l, 1);
+
+ ASSERT_THAT(write(efd, &l, sizeof(l)), SyscallSucceeds());
+ EXPECT_EQ(l, 1);
+
+ ASSERT_THAT(pthread_join(p, nullptr), SyscallSucceeds());
+}
+
+TEST(EventfdTest, SmallWrite) {
+ int efd;
+ ASSERT_THAT(efd = eventfd(0, EFD_NONBLOCK | EFD_SEMAPHORE),
+ SyscallSucceeds());
+
+ uint64_t l = 16;
+ ASSERT_THAT(write(efd, &l, 4), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(EventfdTest, SmallRead) {
+ int efd;
+ ASSERT_THAT(efd = eventfd(0, EFD_NONBLOCK | EFD_SEMAPHORE),
+ SyscallSucceeds());
+
+ uint64_t l = 1;
+ ASSERT_THAT(write(efd, &l, sizeof(l)), SyscallSucceeds());
+
+ l = 0;
+ ASSERT_THAT(read(efd, &l, 4), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(EventfdTest, BigWrite) {
+ int efd;
+ ASSERT_THAT(efd = eventfd(0, EFD_NONBLOCK | EFD_SEMAPHORE),
+ SyscallSucceeds());
+
+ uint64_t big[16];
+ big[0] = 16;
+ ASSERT_THAT(write(efd, big, sizeof(big)), SyscallSucceeds());
+}
+
+TEST(EventfdTest, BigRead) {
+ int efd;
+ ASSERT_THAT(efd = eventfd(0, EFD_NONBLOCK | EFD_SEMAPHORE),
+ SyscallSucceeds());
+
+ uint64_t l = 1;
+ ASSERT_THAT(write(efd, &l, sizeof(l)), SyscallSucceeds());
+
+ uint64_t big[16];
+ ASSERT_THAT(read(efd, big, sizeof(big)), SyscallSucceeds());
+ EXPECT_EQ(big[0], 1);
+}
+
+TEST(EventfdTest, BigWriteBigRead) {
+ int efd;
+ ASSERT_THAT(efd = eventfd(0, EFD_NONBLOCK | EFD_SEMAPHORE),
+ SyscallSucceeds());
+
+ uint64_t l[16];
+ l[0] = 16;
+ ASSERT_THAT(write(efd, l, sizeof(l)), SyscallSucceeds());
+ ASSERT_THAT(read(efd, l, sizeof(l)), SyscallSucceeds());
+ EXPECT_EQ(l[0], 1);
+}
+
+// NotifyNonZero is inherently racy, so random save is disabled.
+TEST(EventfdTest, NotifyNonZero_NoRandomSave) {
+ // Waits will time out at 10 seconds.
+ constexpr int kEpollTimeoutMs = 10000;
+ // Create an eventfd descriptor.
+ int efd;
+ ASSERT_THAT(efd = eventfd(7, EFD_SEMAPHORE | EFD_NONBLOCK),
+ SyscallSucceeds());
+ // Create an epoll fd to listen to efd.
+ int epollfd;
+ ASSERT_THAT(epollfd = epoll_create1(0), SyscallSucceeds());
+ // Add efd to epoll.
+ struct epoll_event add_ev;
+ add_ev.events = EPOLLIN | EPOLLET;
+ add_ev.data.fd = efd;
+ ASSERT_THAT(epoll_ctl(epollfd, EPOLL_CTL_ADD, efd, &add_ev),
+ SyscallSucceeds());
+
+ // Use epoll to get a value from efd.
+ struct epoll_event out_ev;
+ int wait_out = epoll_wait(epollfd, &out_ev, 1, kEpollTimeoutMs);
+ EXPECT_EQ(wait_out, 1);
+ EXPECT_EQ(efd, out_ev.data.fd);
+ uint64_t val = 0;
+ ASSERT_THAT(read(efd, &val, sizeof(val)), SyscallSucceeds());
+ EXPECT_EQ(val, 1);
+
+ // Start a thread that, after this thread blocks on epoll_wait, will write to
+ // efd. This is racy -- it's possible that this write will happen after
+ // epoll_wait times out.
+ ScopedThread t([efd] {
+ sleep(5);
+ uint64_t val = 1;
+ write(efd, &val, sizeof(val));
+ });
+
+ // epoll_wait should return once the thread writes.
+ wait_out = epoll_wait(epollfd, &out_ev, 1, kEpollTimeoutMs);
+ EXPECT_EQ(wait_out, 1);
+ EXPECT_EQ(efd, out_ev.data.fd);
+
+ val = 0;
+ ASSERT_THAT(read(efd, &val, sizeof(val)), SyscallSucceeds());
+ EXPECT_EQ(val, 1);
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/exceptions.cc b/test/syscalls/linux/exceptions.cc
new file mode 100644
index 000000000..72ab354e3
--- /dev/null
+++ b/test/syscalls/linux/exceptions.cc
@@ -0,0 +1,146 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <signal.h>
+
+#include "gtest/gtest.h"
+#include "test/util/logging.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+void inline Halt() { asm("hlt\r\n"); }
+
+void inline SetAlignmentCheck() {
+ asm("pushf\r\n"
+ "pop %%rax\r\n"
+ "or $0x40000, %%rax\r\n"
+ "push %%rax\r\n"
+ "popf\r\n"
+ :
+ :
+ : "ax");
+}
+
+void inline ClearAlignmentCheck() {
+ asm("pushf\r\n"
+ "pop %%rax\r\n"
+ "mov $0x40000, %%rbx\r\n"
+ "not %%rbx\r\n"
+ "and %%rbx, %%rax\r\n"
+ "push %%rax\r\n"
+ "popf\r\n"
+ :
+ :
+ : "ax", "bx");
+}
+
+void inline Int3Normal() { asm(".byte 0xcd, 0x03\r\n"); }
+
+void inline Int3Compact() { asm(".byte 0xcc\r\n"); }
+
+TEST(ExceptionTest, Halt) {
+ // In order to prevent the regular handler from messing with things (and
+ // perhaps refaulting until some other signal occurs), we reset the handler to
+ // the default action here and ensure that it dies correctly.
+ struct sigaction sa = {};
+ sa.sa_handler = SIG_DFL;
+ auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGSEGV, sa));
+
+ EXPECT_EXIT(Halt(), ::testing::KilledBySignal(SIGSEGV), "");
+}
+
+TEST(ExceptionTest, DivideByZero) {
+ // See above.
+ struct sigaction sa = {};
+ sa.sa_handler = SIG_DFL;
+ auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGFPE, sa));
+
+ EXPECT_EXIT(
+ {
+ uint32_t remainder;
+ uint32_t quotient;
+ uint32_t divisor = 0;
+ uint64_t value = 1;
+ asm("divl 0(%2)\r\n"
+ : "=d"(remainder), "=a"(quotient)
+ : "r"(&divisor), "d"(value >> 32), "a"(value));
+ TEST_CHECK(quotient > 0); // Force dependency.
+ },
+ ::testing::KilledBySignal(SIGFPE), "");
+}
+
+TEST(ExceptionTest, Alignment) {
+ SetAlignmentCheck();
+ ClearAlignmentCheck();
+}
+
+TEST(ExceptionTest, AlignmentHalt) {
+ // See above.
+ struct sigaction sa = {};
+ sa.sa_handler = SIG_DFL;
+ auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGSEGV, sa));
+
+ // Reported upstream. We need to ensure that bad flags are cleared even in
+ // fault paths. Set the alignment flag and then generate an exception.
+ EXPECT_EXIT(
+ {
+ SetAlignmentCheck();
+ Halt();
+ },
+ ::testing::KilledBySignal(SIGSEGV), "");
+}
+
+TEST(ExceptionTest, AlignmentCheck) {
+
+ // See above.
+ struct sigaction sa = {};
+ sa.sa_handler = SIG_DFL;
+ auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGBUS, sa));
+
+ EXPECT_EXIT(
+ {
+ char array[16];
+ SetAlignmentCheck();
+ for (int i = 0; i < 8; i++) {
+ // At least 7/8 offsets will be unaligned here.
+ uint64_t* ptr = reinterpret_cast<uint64_t*>(&array[i]);
+ asm("mov %0, 0(%0)\r\n" : : "r"(ptr) : "ax");
+ }
+ },
+ ::testing::KilledBySignal(SIGBUS), "");
+}
+
+TEST(ExceptionTest, Int3Normal) {
+ // See above.
+ struct sigaction sa = {};
+ sa.sa_handler = SIG_DFL;
+ auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGTRAP, sa));
+
+ EXPECT_EXIT(Int3Normal(), ::testing::KilledBySignal(SIGTRAP), "");
+}
+
+TEST(ExceptionTest, Int3Compact) {
+ // See above.
+ struct sigaction sa = {};
+ sa.sa_handler = SIG_DFL;
+ auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGTRAP, sa));
+
+ EXPECT_EXIT(Int3Compact(), ::testing::KilledBySignal(SIGTRAP), "");
+}
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/exec.cc b/test/syscalls/linux/exec.cc
new file mode 100644
index 000000000..1ef40b502
--- /dev/null
+++ b/test/syscalls/linux/exec.cc
@@ -0,0 +1,625 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/exec.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/eventfd.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "absl/strings/match.h"
+#include "absl/strings/numbers.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_split.h"
+#include "absl/strings/string_view.h"
+#include "absl/synchronization/mutex.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+constexpr char kBasicWorkload[] = "exec_basic_workload";
+constexpr char kExitScript[] = "exit_script";
+constexpr char kStateWorkload[] = "exec_state_workload";
+constexpr char kProcExeWorkload[] = "exec_proc_exe_workload";
+constexpr char kAssertClosedWorkload[] = "exec_assert_closed_workload";
+constexpr char kPriorityWorkload[] = "priority_execve";
+
+std::string WorkloadPath(absl::string_view binary) {
+ std::string full_path;
+ char* test_src = getenv("TEST_SRCDIR");
+ if (test_src) {
+ full_path = JoinPath(test_src, "__main__/test/syscalls/linux", binary);
+ }
+ TEST_CHECK(full_path.empty() == false);
+ return full_path;
+}
+
+constexpr char kExit42[] = "--exec_exit_42";
+constexpr char kExecWithThread[] = "--exec_exec_with_thread";
+constexpr char kExecFromThread[] = "--exec_exec_from_thread";
+
+// Runs filename with argv and checks that the exit status is expect_status and
+// that stderr contains expect_stderr.
+void CheckOutput(const std::string& filename, const ExecveArray& argv,
+ const ExecveArray& envv, int expect_status,
+ const std::string& expect_stderr) {
+ int pipe_fds[2];
+ ASSERT_THAT(pipe2(pipe_fds, O_CLOEXEC), SyscallSucceeds());
+
+ FileDescriptor read_fd(pipe_fds[0]);
+ FileDescriptor write_fd(pipe_fds[1]);
+
+ pid_t child;
+ int execve_errno;
+
+ const auto remap_stderr = [pipe_fds] {
+ // Remap stdin and stdout to /dev/null.
+ int fd = open("/dev/null", O_RDWR | O_CLOEXEC);
+ if (fd < 0) {
+ _exit(errno);
+ }
+
+ int ret = dup2(fd, 0);
+ if (ret < 0) {
+ _exit(errno);
+ }
+
+ ret = dup2(fd, 1);
+ if (ret < 0) {
+ _exit(errno);
+ }
+
+ // And stderr to the pipe.
+ ret = dup2(pipe_fds[1], 2);
+ if (ret < 0) {
+ _exit(errno);
+ }
+
+ // Here, we'd ideally close all other FDs inherited from the parent.
+ // However, that's not worth the effort and CloexecNormalFile and
+ // CloexecEventfd depend on that not happening.
+ };
+
+ auto kill = ASSERT_NO_ERRNO_AND_VALUE(
+ ForkAndExec(filename, argv, envv, remap_stderr, &child, &execve_errno));
+
+ ASSERT_EQ(0, execve_errno);
+
+ // Not needed anymore.
+ write_fd.reset();
+
+ // Read stderr until the child exits.
+ std::string output;
+ constexpr int kSize = 128;
+ char buf[kSize];
+ int n;
+ do {
+ ASSERT_THAT(n = ReadFd(read_fd.get(), buf, kSize), SyscallSucceeds());
+ if (n > 0) {
+ output.append(buf, n);
+ }
+ } while (n > 0);
+
+ int status;
+ ASSERT_THAT(RetryEINTR(waitpid)(child, &status, 0), SyscallSucceeds());
+ EXPECT_EQ(status, expect_status);
+
+ // Process cleanup no longer needed.
+ kill.Release();
+
+ EXPECT_TRUE(absl::StrContains(output, expect_stderr)) << output;
+}
+
+TEST(ExecDeathTest, EmptyPath) {
+ int execve_errno;
+ ASSERT_NO_ERRNO_AND_VALUE(ForkAndExec("", {}, {}, nullptr, &execve_errno));
+ EXPECT_EQ(execve_errno, ENOENT);
+}
+
+TEST(ExecDeathTest, Basic) {
+ CheckOutput(WorkloadPath(kBasicWorkload), {WorkloadPath(kBasicWorkload)}, {},
+ ArgEnvExitStatus(0, 0),
+ absl::StrCat(WorkloadPath(kBasicWorkload), "\n"));
+}
+
+TEST(ExecDeathTest, OneArg) {
+ CheckOutput(WorkloadPath(kBasicWorkload), {WorkloadPath(kBasicWorkload), "1"},
+ {}, ArgEnvExitStatus(1, 0),
+ absl::StrCat(WorkloadPath(kBasicWorkload), "\n1\n"));
+}
+
+TEST(ExecDeathTest, FiveArg) {
+ CheckOutput(WorkloadPath(kBasicWorkload),
+ {WorkloadPath(kBasicWorkload), "1", "2", "3", "4", "5"}, {},
+ ArgEnvExitStatus(5, 0),
+ absl::StrCat(WorkloadPath(kBasicWorkload), "\n1\n2\n3\n4\n5\n"));
+}
+
+TEST(ExecDeathTest, OneEnv) {
+ CheckOutput(WorkloadPath(kBasicWorkload), {WorkloadPath(kBasicWorkload)},
+ {"1"}, ArgEnvExitStatus(0, 1),
+ absl::StrCat(WorkloadPath(kBasicWorkload), "\n1\n"));
+}
+
+TEST(ExecDeathTest, FiveEnv) {
+ CheckOutput(WorkloadPath(kBasicWorkload), {WorkloadPath(kBasicWorkload)},
+ {"1", "2", "3", "4", "5"}, ArgEnvExitStatus(0, 5),
+ absl::StrCat(WorkloadPath(kBasicWorkload), "\n1\n2\n3\n4\n5\n"));
+}
+
+TEST(ExecDeathTest, OneArgOneEnv) {
+ CheckOutput(WorkloadPath(kBasicWorkload),
+ {WorkloadPath(kBasicWorkload), "arg"}, {"env"},
+ ArgEnvExitStatus(1, 1),
+ absl::StrCat(WorkloadPath(kBasicWorkload), "\narg\nenv\n"));
+}
+
+TEST(ExecDeathTest, InterpreterScript) {
+ CheckOutput(WorkloadPath(kExitScript), {WorkloadPath(kExitScript), "25"}, {},
+ ArgEnvExitStatus(25, 0), "");
+}
+
+// Everything after the path in the interpreter script is a single argument.
+TEST(ExecDeathTest, InterpreterScriptArgSplit) {
+ // Symlink through /tmp to ensure the path is short enough.
+ TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
+ TempPath::CreateSymlinkTo("/tmp", WorkloadPath(kBasicWorkload)));
+
+ TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), absl::StrCat("#!", link.path(), " foo bar"),
+ 0755));
+
+ CheckOutput(script.path(), {script.path()}, {}, ArgEnvExitStatus(2, 0),
+ absl::StrCat(link.path(), "\nfoo bar\n", script.path(), "\n"));
+}
+
+// Original argv[0] is replaced with the script path.
+TEST(ExecDeathTest, InterpreterScriptArgvZero) {
+ // Symlink through /tmp to ensure the path is short enough.
+ TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
+ TempPath::CreateSymlinkTo("/tmp", WorkloadPath(kBasicWorkload)));
+
+ TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), absl::StrCat("#!", link.path()), 0755));
+
+ CheckOutput(script.path(), {"REPLACED"}, {}, ArgEnvExitStatus(1, 0),
+ absl::StrCat(link.path(), "\n", script.path(), "\n"));
+}
+
+// Original argv[0] is replaced with the script path, exactly as passed to
+// execve.
+TEST(ExecDeathTest, InterpreterScriptArgvZeroRelative) {
+ // Symlink through /tmp to ensure the path is short enough.
+ TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
+ TempPath::CreateSymlinkTo("/tmp", WorkloadPath(kBasicWorkload)));
+
+ TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), absl::StrCat("#!", link.path()), 0755));
+
+ auto cwd = ASSERT_NO_ERRNO_AND_VALUE(GetCWD());
+ auto script_relative =
+ ASSERT_NO_ERRNO_AND_VALUE(GetRelativePath(cwd, script.path()));
+
+ CheckOutput(script_relative, {"REPLACED"}, {}, ArgEnvExitStatus(1, 0),
+ absl::StrCat(link.path(), "\n", script_relative, "\n"));
+}
+
+// argv[0] is added as the script path, even if there was none.
+TEST(ExecDeathTest, InterpreterScriptArgvZeroAdded) {
+ // Symlink through /tmp to ensure the path is short enough.
+ TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
+ TempPath::CreateSymlinkTo("/tmp", WorkloadPath(kBasicWorkload)));
+
+ TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), absl::StrCat("#!", link.path()), 0755));
+
+ CheckOutput(script.path(), {}, {}, ArgEnvExitStatus(1, 0),
+ absl::StrCat(link.path(), "\n", script.path(), "\n"));
+}
+
+// A NUL byte in the script line ends parsing.
+TEST(ExecDeathTest, InterpreterScriptArgNUL) {
+ // Symlink through /tmp to ensure the path is short enough.
+ TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
+ TempPath::CreateSymlinkTo("/tmp", WorkloadPath(kBasicWorkload)));
+
+ TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(),
+ absl::StrCat("#!", link.path(), " foo", std::string(1, '\0'), "bar"), 0755));
+
+ CheckOutput(script.path(), {script.path()}, {}, ArgEnvExitStatus(2, 0),
+ absl::StrCat(link.path(), "\nfoo\n", script.path(), "\n"));
+}
+
+// Trailing whitespace following interpreter path is ignored.
+TEST(ExecDeathTest, InterpreterScriptTrailingWhitespace) {
+ // Symlink through /tmp to ensure the path is short enough.
+ TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
+ TempPath::CreateSymlinkTo("/tmp", WorkloadPath(kBasicWorkload)));
+
+ TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), absl::StrCat("#!", link.path(), " "), 0755));
+
+ CheckOutput(script.path(), {script.path()}, {}, ArgEnvExitStatus(1, 0),
+ absl::StrCat(link.path(), "\n", script.path(), "\n"));
+}
+
+// Multiple whitespace characters between interpreter and arg allowed.
+TEST(ExecDeathTest, InterpreterScriptArgWhitespace) {
+ // Symlink through /tmp to ensure the path is short enough.
+ TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
+ TempPath::CreateSymlinkTo("/tmp", WorkloadPath(kBasicWorkload)));
+
+ TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), absl::StrCat("#!", link.path(), " foo"), 0755));
+
+ CheckOutput(script.path(), {script.path()}, {}, ArgEnvExitStatus(2, 0),
+ absl::StrCat(link.path(), "\nfoo\n", script.path(), "\n"));
+}
+
+TEST(ExecDeathTest, InterpreterScriptNoPath) {
+ TempPath script = ASSERT_NO_ERRNO_AND_VALUE(
+ TempPath::CreateFileWith(GetAbsoluteTestTmpdir(), "#!", 0755));
+
+ int execve_errno;
+ ASSERT_NO_ERRNO_AND_VALUE(
+ ForkAndExec(script.path(), {script.path()}, {}, nullptr, &execve_errno));
+ EXPECT_EQ(execve_errno, ENOEXEC);
+}
+
+// AT_EXECFN is the path passed to execve.
+TEST(ExecDeathTest, ExecFn) {
+ // Symlink through /tmp to ensure the path is short enough.
+ TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
+ TempPath::CreateSymlinkTo("/tmp", WorkloadPath(kStateWorkload)));
+
+ TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), absl::StrCat("#!", link.path(), " PrintExecFn"),
+ 0755));
+
+ // Pass the script as a relative path and assert that is what appears in
+ // AT_EXECFN.
+ auto cwd = ASSERT_NO_ERRNO_AND_VALUE(GetCWD());
+ auto script_relative =
+ ASSERT_NO_ERRNO_AND_VALUE(GetRelativePath(cwd, script.path()));
+
+ CheckOutput(script_relative, {script_relative}, {}, ArgEnvExitStatus(0, 0),
+ absl::StrCat(script_relative, "\n"));
+}
+
+TEST(ExecDeathTest, ExecName) {
+ std::string path = WorkloadPath(kStateWorkload);
+
+ CheckOutput(path, {path, "PrintExecName"}, {}, ArgEnvExitStatus(0, 0),
+ absl::StrCat(Basename(path).substr(0, 15), "\n"));
+}
+
+TEST(ExecDeathTest, ExecNameScript) {
+ // Symlink through /tmp to ensure the path is short enough.
+ TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
+ TempPath::CreateSymlinkTo("/tmp", WorkloadPath(kStateWorkload)));
+
+ TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(),
+ absl::StrCat("#!", link.path(), " PrintExecName"), 0755));
+
+ std::string script_path = script.path();
+
+ CheckOutput(script_path, {script_path}, {}, ArgEnvExitStatus(0, 0),
+ absl::StrCat(Basename(script_path).substr(0, 15), "\n"));
+}
+
+// execve may be called by a multithreaded process.
+TEST(ExecDeathTest, WithSiblingThread) {
+ CheckOutput("/proc/self/exe", {"/proc/self/exe", kExecWithThread}, {},
+ W_EXITCODE(42, 0), "");
+}
+
+// execve may be called from a thread other than the leader of a multithreaded
+// process.
+TEST(ExecDeathTest, FromSiblingThread) {
+ CheckOutput("/proc/self/exe", {"/proc/self/exe", kExecFromThread}, {},
+ W_EXITCODE(42, 0), "");
+}
+
+TEST(ExecTest, NotFound) {
+ char* const argv[] = {nullptr};
+ char* const envp[] = {nullptr};
+ EXPECT_THAT(execve("/file/does/not/exist", argv, envp),
+ SyscallFailsWithErrno(ENOENT));
+}
+
+TEST(ExecTest, NoExecPerm) {
+ char* const argv[] = {nullptr};
+ char* const envp[] = {nullptr};
+ auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ EXPECT_THAT(execve(f.path().c_str(), argv, envp),
+ SyscallFailsWithErrno(EACCES));
+}
+
+// A signal handler we never expect to be called.
+void SignalHandler(int signo) {
+ std::cerr << "Signal " << signo << " raised." << std::endl;
+ exit(1);
+}
+
+// Signal handlers are reset on execve(2), unless they have default or ignored
+// disposition.
+TEST(ExecStateDeathTest, HandlerReset) {
+ struct sigaction sa;
+ sa.sa_handler = SignalHandler;
+ ASSERT_THAT(sigaction(SIGUSR1, &sa, nullptr), SyscallSucceeds());
+
+ ExecveArray args = {
+ WorkloadPath(kStateWorkload),
+ "CheckSigHandler",
+ absl::StrCat(SIGUSR1),
+ absl::StrCat(absl::Hex(reinterpret_cast<uintptr_t>(SIG_DFL))),
+ };
+
+ CheckOutput(WorkloadPath(kStateWorkload), args, {}, W_EXITCODE(0, 0), "");
+}
+
+// Ignored signal dispositions are not reset.
+TEST(ExecStateDeathTest, IgnorePreserved) {
+ struct sigaction sa;
+ sa.sa_handler = SIG_IGN;
+ ASSERT_THAT(sigaction(SIGUSR1, &sa, nullptr), SyscallSucceeds());
+
+ ExecveArray args = {
+ WorkloadPath(kStateWorkload),
+ "CheckSigHandler",
+ absl::StrCat(SIGUSR1),
+ absl::StrCat(absl::Hex(reinterpret_cast<uintptr_t>(SIG_IGN))),
+ };
+
+ CheckOutput(WorkloadPath(kStateWorkload), args, {}, W_EXITCODE(0, 0), "");
+}
+
+// Signal masks are not reset on exec
+TEST(ExecStateDeathTest, SignalMask) {
+ sigset_t s;
+ sigemptyset(&s);
+ sigaddset(&s, SIGUSR1);
+ ASSERT_THAT(sigprocmask(SIG_BLOCK, &s, nullptr), SyscallSucceeds());
+
+ ExecveArray args = {
+ WorkloadPath(kStateWorkload),
+ "CheckSigBlocked",
+ absl::StrCat(SIGUSR1),
+ };
+
+ CheckOutput(WorkloadPath(kStateWorkload), args, {}, W_EXITCODE(0, 0), "");
+}
+
+// itimers persist across execve.
+// N.B. Timers created with timer_create(2) should not be preserved!
+TEST(ExecStateDeathTest, ItimerPreserved) {
+ // The fork in ForkAndExec clears itimers, so only set them up after fork.
+ auto setup_itimer = [] {
+ // Ignore SIGALRM, as we don't actually care about timer
+ // expirations.
+ struct sigaction sa;
+ sa.sa_handler = SIG_IGN;
+ int ret = sigaction(SIGALRM, &sa, nullptr);
+ if (ret < 0) {
+ _exit(errno);
+ }
+
+ struct itimerval itv;
+ itv.it_interval.tv_sec = 1;
+ itv.it_interval.tv_usec = 0;
+ itv.it_value.tv_sec = 1;
+ itv.it_value.tv_usec = 0;
+ ret = setitimer(ITIMER_REAL, &itv, nullptr);
+ if (ret < 0) {
+ _exit(errno);
+ }
+ };
+
+ std::string filename = WorkloadPath(kStateWorkload);
+ ExecveArray argv = {
+ filename,
+ "CheckItimerEnabled",
+ absl::StrCat(ITIMER_REAL),
+ };
+
+ pid_t child;
+ int execve_errno;
+ auto kill = ASSERT_NO_ERRNO_AND_VALUE(
+ ForkAndExec(filename, argv, {}, setup_itimer, &child, &execve_errno));
+ ASSERT_EQ(0, execve_errno);
+
+ int status;
+ ASSERT_THAT(RetryEINTR(waitpid)(child, &status, 0), SyscallSucceeds());
+ EXPECT_EQ(0, status);
+
+ // Process cleanup no longer needed.
+ kill.Release();
+}
+
+TEST(ProcSelfExe, ChangesAcrossExecve) {
+ // See exec_proc_exe_workload for more details. We simply
+ // assert that the /proc/self/exe link changes across execve.
+ CheckOutput(WorkloadPath(kProcExeWorkload),
+ {WorkloadPath(kProcExeWorkload),
+ ASSERT_NO_ERRNO_AND_VALUE(ProcessExePath(getpid()))},
+ {}, W_EXITCODE(0, 0), "");
+}
+
+TEST(ExecTest, CloexecNormalFile) {
+ const FileDescriptor fd_closed_on_exec = ASSERT_NO_ERRNO_AND_VALUE(
+ Open("/usr/share/zoneinfo", O_RDONLY | O_CLOEXEC));
+
+ CheckOutput(WorkloadPath(kAssertClosedWorkload),
+ {WorkloadPath(kAssertClosedWorkload),
+ absl::StrCat(fd_closed_on_exec.get())},
+ {}, W_EXITCODE(0, 0), "");
+
+ // The assert closed workload exits with code 2 if the file still exists. We
+ // can use this to do a negative test.
+ const FileDescriptor fd_open_on_exec =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/usr/share/zoneinfo", O_RDONLY));
+
+ CheckOutput(WorkloadPath(kAssertClosedWorkload),
+ {WorkloadPath(kAssertClosedWorkload),
+ absl::StrCat(fd_open_on_exec.get())},
+ {}, W_EXITCODE(2, 0), "");
+}
+
+TEST(ExecTest, CloexecEventfd) {
+ int efd;
+ ASSERT_THAT(efd = eventfd(0, EFD_CLOEXEC), SyscallSucceeds());
+ FileDescriptor fd(efd);
+
+ CheckOutput(WorkloadPath(kAssertClosedWorkload),
+ {WorkloadPath(kAssertClosedWorkload), absl::StrCat(fd.get())}, {},
+ W_EXITCODE(0, 0), "");
+}
+
+// Priority consistent across calls to execve()
+TEST(GetpriorityTest, ExecveMaintainsPriority) {
+ int prio = 16;
+ ASSERT_THAT(setpriority(PRIO_PROCESS, getpid(), prio), SyscallSucceeds());
+
+ // To avoid trying to use negative exit values, check for
+ // 20 - prio. Since prio should always be in the range [-20, 19],
+ // this leave expected_exit_code in the range [1, 40].
+ int expected_exit_code = 20 - prio;
+
+ // Program run (priority_execve) will exit(X) where
+ // X=getpriority(PRIO_PROCESS,0). Check that this exit value is prio.
+ CheckOutput(WorkloadPath(kPriorityWorkload),
+ {WorkloadPath(kPriorityWorkload)}, {},
+ W_EXITCODE(expected_exit_code, 0), "");
+}
+
+void ExecWithThread() {
+ // Used to ensure that the thread has actually started.
+ absl::Mutex mu;
+ bool started = false;
+
+ ScopedThread t([&] {
+ mu.Lock();
+ started = true;
+ mu.Unlock();
+
+ while (true) {
+ pause();
+ }
+ });
+
+ mu.LockWhen(absl::Condition(&started));
+ mu.Unlock();
+
+ const ExecveArray argv = {"/proc/self/exe", kExit42};
+ const ExecveArray envv;
+
+ execve("/proc/self/exe", argv.get(), envv.get());
+ exit(errno);
+}
+
+void ExecFromThread() {
+ ScopedThread t([] {
+ const ExecveArray argv = {"/proc/self/exe", kExit42};
+ const ExecveArray envv;
+
+ execve("/proc/self/exe", argv.get(), envv.get());
+ exit(errno);
+ });
+
+ while (true) {
+ pause();
+ }
+}
+
+bool ValidateProcCmdlineVsArgv(const int argc, const char* const* argv) {
+ auto contents_or = GetContents("/proc/self/cmdline");
+ if (!contents_or.ok()) {
+ LOG(ERROR) << "Unable to get /proc/self/cmdline: " << contents_or.error();
+ return false;
+ }
+ auto contents = contents_or.ValueOrDie();
+ if (contents.back() != '\0') {
+ LOG(ERROR) << "Non-null terminated /proc/self/cmdline!";
+ return false;
+ }
+ contents.pop_back();
+ std::vector<std::string> procfs_cmdline = absl::StrSplit(contents, '\0');
+
+ if (static_cast<int>(procfs_cmdline.size()) != argc) {
+ LOG(ERROR) << "argc = " << argc << " != " << procfs_cmdline.size();
+ return false;
+ }
+
+ for (int i = 0; i < argc; ++i) {
+ if (procfs_cmdline[i] != argv[i]) {
+ LOG(ERROR) << "Procfs command line argument " << i << " mismatch "
+ << procfs_cmdline[i] << " != " << argv[i];
+ return false;
+ }
+ }
+ return true;
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
+
+int main(int argc, char** argv) {
+ // Start by validating that the stack argv is consistent with procfs.
+ if (!gvisor::testing::ValidateProcCmdlineVsArgv(argc, argv)) {
+ return 1;
+ }
+
+ // Some of these tests require no background threads, so check for them before
+ // TestInit.
+ for (int i = 0; i < argc; i++) {
+ absl::string_view arg(argv[i]);
+
+ if (arg == gvisor::testing::kExit42) {
+ return 42;
+ }
+ if (arg == gvisor::testing::kExecWithThread) {
+ gvisor::testing::ExecWithThread();
+ return 1;
+ }
+ if (arg == gvisor::testing::kExecFromThread) {
+ gvisor::testing::ExecFromThread();
+ return 1;
+ }
+ }
+
+ gvisor::testing::TestInit(&argc, &argv);
+
+ return RUN_ALL_TESTS();
+}
diff --git a/test/syscalls/linux/exec.h b/test/syscalls/linux/exec.h
new file mode 100644
index 000000000..b82bfffd1
--- /dev/null
+++ b/test/syscalls/linux/exec.h
@@ -0,0 +1,34 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_EXEC_H_
+#define GVISOR_TEST_SYSCALLS_EXEC_H_
+
+#include <sys/wait.h>
+
+namespace gvisor {
+namespace testing {
+
+// Returns the exit code used by exec_basic_workload.
+inline int ArgEnvExitCode(int args, int envs) { return args + envs * 10; }
+
+// Returns the exit status used by exec_basic_workload.
+inline int ArgEnvExitStatus(int args, int envs) {
+ return W_EXITCODE(ArgEnvExitCode(args, envs), 0);
+}
+
+} // namespace testing
+} // namespace gvisor
+
+#endif // GVISOR_TEST_SYSCALLS_EXEC_H_
diff --git a/test/syscalls/linux/exec_assert_closed_workload.cc b/test/syscalls/linux/exec_assert_closed_workload.cc
new file mode 100644
index 000000000..4448431e1
--- /dev/null
+++ b/test/syscalls/linux/exec_assert_closed_workload.cc
@@ -0,0 +1,45 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <iostream>
+
+#include "absl/strings/numbers.h"
+
+int main(int argc, char** argv) {
+ if (argc != 2) {
+ std::cerr << "need two arguments, got " << argc;
+ exit(1);
+ }
+ int fd;
+ if (!absl::SimpleAtoi(argv[1], &fd)) {
+ std::cerr << "fd: " << argv[1] << " could not be parsed" << std::endl;
+ exit(1);
+ }
+ struct stat s;
+ if (fstat(fd, &s) == 0) {
+ std::cerr << "fd: " << argv[1] << " should not be valid" << std::endl;
+ exit(2);
+ }
+ if (errno != EBADF) {
+ std::cerr << "fstat fd: " << argv[1] << " got errno: " << errno
+ << " wanted: " << EBADF << std::endl;
+ exit(1);
+ }
+ return 0;
+}
diff --git a/test/syscalls/linux/exec_basic_workload.cc b/test/syscalls/linux/exec_basic_workload.cc
new file mode 100644
index 000000000..d4bdf511f
--- /dev/null
+++ b/test/syscalls/linux/exec_basic_workload.cc
@@ -0,0 +1,31 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdlib.h>
+
+#include <iostream>
+
+#include "test/syscalls/linux/exec.h"
+
+int main(int argc, char** argv, char** envp) {
+ int i;
+ for (i = 0; i < argc; i++) {
+ std::cerr << argv[i] << std::endl;
+ }
+ for (i = 0; envp[i] != nullptr; i++) {
+ std::cerr << envp[i] << std::endl;
+ }
+ exit(gvisor::testing::ArgEnvExitCode(argc - 1, i));
+ return 0;
+}
diff --git a/test/syscalls/linux/exec_binary.cc b/test/syscalls/linux/exec_binary.cc
new file mode 100644
index 000000000..cfc898699
--- /dev/null
+++ b/test/syscalls/linux/exec_binary.cc
@@ -0,0 +1,1367 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <elf.h>
+#include <errno.h>
+#include <signal.h>
+#include <sys/ptrace.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/user.h>
+#include <unistd.h>
+#include <algorithm>
+#include <functional>
+#include <iterator>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/string_view.h"
+#include "test/util/cleanup.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/proc_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+using ::testing::AnyOf;
+using ::testing::Eq;
+
+#ifndef __x86_64__
+// The assembly stub and ELF internal details must be ported to other arches.
+#error "Test only supported on x86-64"
+#endif // __x86_64__
+
+// amd64 stub that calls PTRACE_TRACEME and sends itself SIGSTOP.
+const char kPtraceCode[] = {
+ // movq $101, %rax /* ptrace */
+ '\x48',
+ '\xc7',
+ '\xc0',
+ '\x65',
+ '\x00',
+ '\x00',
+ '\x00',
+ // movq $0, %rsi /* PTRACE_TRACEME */
+ '\x48',
+ '\xc7',
+ '\xc6',
+ '\x00',
+ '\x00',
+ '\x00',
+ '\x00',
+ // movq $0, %rdi
+ '\x48',
+ '\xc7',
+ '\xc7',
+ '\x00',
+ '\x00',
+ '\x00',
+ '\x00',
+ // movq $0, %rdx
+ '\x48',
+ '\xc7',
+ '\xc2',
+ '\x00',
+ '\x00',
+ '\x00',
+ '\x00',
+ // movq $0, %r10
+ '\x49',
+ '\xc7',
+ '\xc2',
+ '\x00',
+ '\x00',
+ '\x00',
+ '\x00',
+ // syscall
+ '\x0f',
+ '\x05',
+
+ // movq $39, %rax /* getpid */
+ '\x48',
+ '\xc7',
+ '\xc0',
+ '\x27',
+ '\x00',
+ '\x00',
+ '\x00',
+ // syscall
+ '\x0f',
+ '\x05',
+
+ // movq %rax, %rdi /* pid */
+ '\x48',
+ '\x89',
+ '\xc7',
+ // movq $62, %rax /* kill */
+ '\x48',
+ '\xc7',
+ '\xc0',
+ '\x3e',
+ '\x00',
+ '\x00',
+ '\x00',
+ // movq $19, %rsi /* SIGSTOP */
+ '\x48',
+ '\xc7',
+ '\xc6',
+ '\x13',
+ '\x00',
+ '\x00',
+ '\x00',
+ // syscall
+ '\x0f',
+ '\x05',
+};
+
+// Size of a syscall instruction.
+constexpr int kSyscallSize = 2;
+
+// This test suite tests executable loading in the kernel (ELF and interpreter
+// scripts).
+
+// Parameterized ELF types for 64 and 32 bit.
+template <int Size>
+struct ElfTypes;
+
+template <>
+struct ElfTypes<64> {
+ typedef Elf64_Ehdr ElfEhdr;
+ typedef Elf64_Phdr ElfPhdr;
+};
+
+template <>
+struct ElfTypes<32> {
+ typedef Elf32_Ehdr ElfEhdr;
+ typedef Elf32_Phdr ElfPhdr;
+};
+
+template <int Size>
+struct ElfBinary {
+ using ElfEhdr = typename ElfTypes<Size>::ElfEhdr;
+ using ElfPhdr = typename ElfTypes<Size>::ElfPhdr;
+
+ ElfEhdr header = {};
+ std::vector<ElfPhdr> phdrs;
+ std::vector<char> data;
+
+ // UpdateOffsets updates p_offset, p_vaddr in all phdrs to account for the
+ // space taken by the header and phdrs.
+ //
+ // It also updates header.e_phnum and adds the offset to header.e_entry to
+ // account for the headers residing in the first PT_LOAD segment.
+ //
+ // Before calling UpdateOffsets each of those fields should be the appropriate
+ // offset into data.
+ void UpdateOffsets() {
+ size_t offset = sizeof(header) + phdrs.size() * sizeof(ElfPhdr);
+ header.e_entry += offset;
+ header.e_phnum = phdrs.size();
+ for (auto& p : phdrs) {
+ p.p_offset += offset;
+ p.p_vaddr += offset;
+ }
+ }
+
+ // AddInterpreter adds a PT_INTERP segment with the passed contents.
+ //
+ // A later call to UpdateOffsets is required to make the new phdr valid.
+ void AddInterpreter(std::vector<char> contents) {
+ const int start = data.size();
+ data.insert(data.end(), contents.begin(), contents.end());
+ const int size = data.size() - start;
+
+ ElfPhdr phdr = {};
+ phdr.p_type = PT_INTERP;
+ phdr.p_offset = start;
+ phdr.p_filesz = size;
+ phdr.p_memsz = size;
+ // "If [PT_INTERP] is present, it must precede any loadable segment entry."
+ phdrs.insert(phdrs.begin(), phdr);
+ }
+
+ // Writes the header, phdrs, and data to fd.
+ PosixError Write(int fd) const {
+ int ret = WriteFd(fd, &header, sizeof(header));
+ if (ret < 0) {
+ return PosixError(errno, "failed to write header");
+ } else if (ret != sizeof(header)) {
+ return PosixError(EIO, absl::StrCat("short write of header: ", ret));
+ }
+
+ for (auto const& p : phdrs) {
+ ret = WriteFd(fd, &p, sizeof(p));
+ if (ret < 0) {
+ return PosixError(errno, "failed to write phdr");
+ } else if (ret != sizeof(p)) {
+ return PosixError(EIO, absl::StrCat("short write of phdr: ", ret));
+ }
+ }
+
+ ret = WriteFd(fd, data.data(), data.size());
+ if (ret < 0) {
+ return PosixError(errno, "failed to write data");
+ } else if (ret != static_cast<int>(data.size())) {
+ return PosixError(EIO, absl::StrCat("short write of data: ", ret));
+ }
+
+ return NoError();
+ }
+};
+
+// Creates a new temporary executable ELF file in parent with elf as the
+// contents.
+template <int Size>
+PosixErrorOr<TempPath> CreateElfWith(absl::string_view parent,
+ ElfBinary<Size> const& elf) {
+ ASSIGN_OR_RETURN_ERRNO(
+ auto file, TempPath::CreateFileWith(parent, absl::string_view(), 0755));
+ ASSIGN_OR_RETURN_ERRNO(auto fd, Open(file.path(), O_RDWR));
+ RETURN_IF_ERRNO(elf.Write(fd.get()));
+ return std::move(file);
+}
+
+// Creates a new temporary executable ELF file with elf as the contents.
+template <int Size>
+PosixErrorOr<TempPath> CreateElfWith(ElfBinary<Size> const& elf) {
+ return CreateElfWith(GetAbsoluteTestTmpdir(), elf);
+}
+
+// Wait for pid to stop, and assert that it stopped via SIGSTOP.
+PosixError WaitStopped(pid_t pid) {
+ int status;
+ int ret = RetryEINTR(waitpid)(pid, &status, 0);
+ MaybeSave();
+ if (ret < 0) {
+ return PosixError(errno, "wait failed");
+ } else if (ret != pid) {
+ return PosixError(ESRCH, absl::StrCat("wait got ", ret, " want ", pid));
+ }
+
+ if (!WIFSTOPPED(status) || WSTOPSIG(status) != SIGSTOP) {
+ return PosixError(EINVAL,
+ absl::StrCat("pid did not SIGSTOP; status = ", status));
+ }
+
+ return NoError();
+}
+
+// Returns a valid ELF that PTRACE_TRACEME and SIGSTOPs itself.
+//
+// UpdateOffsets must be called before writing this ELF.
+ElfBinary<64> StandardElf() {
+ ElfBinary<64> elf;
+ elf.header.e_ident[EI_MAG0] = ELFMAG0;
+ elf.header.e_ident[EI_MAG1] = ELFMAG1;
+ elf.header.e_ident[EI_MAG2] = ELFMAG2;
+ elf.header.e_ident[EI_MAG3] = ELFMAG3;
+ elf.header.e_ident[EI_CLASS] = ELFCLASS64;
+ elf.header.e_ident[EI_DATA] = ELFDATA2LSB;
+ elf.header.e_ident[EI_VERSION] = EV_CURRENT;
+ elf.header.e_type = ET_EXEC;
+ elf.header.e_machine = EM_X86_64;
+ elf.header.e_version = EV_CURRENT;
+ elf.header.e_phoff = sizeof(elf.header);
+ elf.header.e_phentsize = sizeof(decltype(elf)::ElfPhdr);
+
+ // TODO: Always include a PT_GNU_STACK segment to disable
+ // executable stacks. With this omitted the stack (and all PROT_READ) mappings
+ // should be executable, but gVisor doesn't support that.
+ decltype(elf)::ElfPhdr phdr = {};
+ phdr.p_type = PT_GNU_STACK;
+ phdr.p_flags = PF_R | PF_W;
+ elf.phdrs.push_back(phdr);
+
+ phdr = {};
+ phdr.p_type = PT_LOAD;
+ phdr.p_flags = PF_R | PF_X;
+ phdr.p_offset = 0;
+ phdr.p_vaddr = 0x40000;
+ phdr.p_filesz = sizeof(kPtraceCode);
+ phdr.p_memsz = phdr.p_filesz;
+ elf.phdrs.push_back(phdr);
+
+ elf.header.e_entry = phdr.p_vaddr;
+
+ elf.data.assign(kPtraceCode, kPtraceCode + sizeof(kPtraceCode));
+
+ return elf;
+}
+
+// Test that a trivial binary executes.
+TEST(ElfTest, Execute) {
+ ElfBinary<64> elf = StandardElf();
+ elf.UpdateOffsets();
+
+ TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf));
+
+ pid_t child;
+ int execve_errno;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno));
+ ASSERT_EQ(execve_errno, 0);
+
+ // Ensure it made it to SIGSTOP.
+ ASSERT_NO_ERRNO(WaitStopped(child));
+
+ struct user_regs_struct regs;
+ ASSERT_THAT(ptrace(PTRACE_GETREGS, child, 0, &regs), SyscallSucceeds());
+ // RIP is just beyond the final syscall instruction.
+ EXPECT_EQ(regs.rip, elf.header.e_entry + sizeof(kPtraceCode));
+
+ EXPECT_THAT(child, ContainsMappings(std::vector<ProcMapsEntry>({
+ {0x40000, 0x41000, true, false, true, true, 0, 0, 0, 0,
+ file.path().c_str()},
+ })));
+}
+
+// StandardElf without data completes execve, but faults once running.
+TEST(ElfTest, MissingText) {
+ ElfBinary<64> elf = StandardElf();
+ elf.data.clear();
+ elf.UpdateOffsets();
+
+ TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf));
+
+ pid_t child;
+ int execve_errno;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno));
+ ASSERT_EQ(execve_errno, 0);
+
+ int status;
+ ASSERT_THAT(RetryEINTR(waitpid)(child, &status, 0),
+ SyscallSucceedsWithValue(child));
+ // It runs off the end of the zeroes filling the end of the page.
+ EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGSEGV) << status;
+}
+
+// Typical ELF with a data + bss segment
+TEST(ElfTest, DataSegment) {
+ ElfBinary<64> elf = StandardElf();
+
+ // Create a standard ELF, but extend to 1.5 pages. The second page will be the
+ // beginning of a multi-page data + bss segment.
+ elf.data.resize(kPageSize + kPageSize / 2);
+
+ decltype(elf)::ElfPhdr phdr = {};
+ phdr.p_type = PT_LOAD;
+ phdr.p_flags = PF_R | PF_W;
+ phdr.p_offset = kPageSize;
+ phdr.p_vaddr = 0x41000;
+ phdr.p_filesz = kPageSize / 2;
+ // The header is going to push vaddr up by a few hundred bytes. Keep p_memsz a
+ // bit less than 2 pages so this mapping doesn't extend beyond 0x43000.
+ phdr.p_memsz = 2 * kPageSize - kPageSize / 2;
+ elf.phdrs.push_back(phdr);
+
+ elf.UpdateOffsets();
+
+ TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf));
+
+ pid_t child;
+ int execve_errno;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno));
+ ASSERT_EQ(execve_errno, 0);
+
+ ASSERT_NO_ERRNO(WaitStopped(child));
+
+ EXPECT_THAT(
+ child, ContainsMappings(std::vector<ProcMapsEntry>({
+ // text page.
+ {0x40000, 0x41000, true, false, true, true, 0, 0, 0, 0,
+ file.path().c_str()},
+ // data + bss page from file.
+ {0x41000, 0x42000, true, true, false, true, kPageSize, 0, 0, 0,
+ file.path().c_str()},
+ // bss page from anon.
+ {0x42000, 0x43000, true, true, false, true, 0, 0, 0, 0, ""},
+ })));
+}
+
+// Linux will allow PT_LOAD segments to overlap.
+TEST(ElfTest, DirectlyOverlappingSegments) {
+ // NOTE: see PIEOutOfOrderSegments.
+ SKIP_IF(IsRunningOnGvisor());
+
+ ElfBinary<64> elf = StandardElf();
+
+ // Same as the StandardElf mapping.
+ decltype(elf)::ElfPhdr phdr = {};
+ phdr.p_type = PT_LOAD;
+ // Add PF_W so we can differentiate this mapping from the first.
+ phdr.p_flags = PF_R | PF_W | PF_X;
+ phdr.p_offset = 0;
+ phdr.p_vaddr = 0x40000;
+ phdr.p_filesz = sizeof(kPtraceCode);
+ phdr.p_memsz = phdr.p_filesz;
+ elf.phdrs.push_back(phdr);
+
+ elf.UpdateOffsets();
+
+ TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf));
+
+ pid_t child;
+ int execve_errno;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno));
+ ASSERT_EQ(execve_errno, 0);
+
+ ASSERT_NO_ERRNO(WaitStopped(child));
+
+ EXPECT_THAT(child, ContainsMappings(std::vector<ProcMapsEntry>({
+ {0x40000, 0x41000, true, true, true, true, 0, 0, 0, 0,
+ file.path().c_str()},
+ })));
+}
+
+// Linux allows out-of-order PT_LOAD segments.
+TEST(ElfTest, OutOfOrderSegments) {
+ // NOTE: see PIEOutOfOrderSegments.
+ SKIP_IF(IsRunningOnGvisor());
+
+ ElfBinary<64> elf = StandardElf();
+
+ decltype(elf)::ElfPhdr phdr = {};
+ phdr.p_type = PT_LOAD;
+ phdr.p_flags = PF_R | PF_X;
+ phdr.p_offset = 0;
+ phdr.p_vaddr = 0x20000;
+ phdr.p_filesz = sizeof(kPtraceCode);
+ phdr.p_memsz = phdr.p_filesz;
+ elf.phdrs.push_back(phdr);
+
+ elf.UpdateOffsets();
+
+ TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf));
+
+ pid_t child;
+ int execve_errno;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno));
+ ASSERT_EQ(execve_errno, 0);
+
+ ASSERT_NO_ERRNO(WaitStopped(child));
+
+ EXPECT_THAT(child, ContainsMappings(std::vector<ProcMapsEntry>({
+ {0x20000, 0x21000, true, false, true, true, 0, 0, 0, 0,
+ file.path().c_str()},
+ {0x40000, 0x41000, true, false, true, true, 0, 0, 0, 0,
+ file.path().c_str()},
+ })));
+}
+
+// header.e_phoff is bound the end of the file.
+TEST(ElfTest, OutOfBoundsPhdrs) {
+ ElfBinary<64> elf = StandardElf();
+ elf.header.e_phoff = 0x100000;
+ elf.UpdateOffsets();
+
+ TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf));
+
+ pid_t child;
+ int execve_errno;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno));
+ // On Linux 3.11, this caused EIO. On newer Linux, it causes ENOEXEC.
+ EXPECT_THAT(execve_errno, AnyOf(Eq(ENOEXEC), Eq(EIO)));
+}
+
+// Claim there is a phdr beyond the end of the file, but don't include it.
+TEST(ElfTest, MissingPhdr) {
+ ElfBinary<64> elf = StandardElf();
+
+ // Clear data so the file ends immediately after the phdrs.
+ // N.B. Per ElfTest.MissingData, StandardElf without data completes execve
+ // without error.
+ elf.data.clear();
+ elf.UpdateOffsets();
+
+ // Claim that there is another phdr just beyond the end of the file. Of
+ // course, it isn't accessible.
+ elf.header.e_phnum++;
+
+ TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf));
+
+ pid_t child;
+ int execve_errno;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno));
+ // On Linux 3.11, this caused EIO. On newer Linux, it causes ENOEXEC.
+ EXPECT_THAT(execve_errno, AnyOf(Eq(ENOEXEC), Eq(EIO)));
+}
+
+// No headers at all, just the ELF magic.
+TEST(ElfTest, MissingHeader) {
+ TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0755));
+ FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR));
+
+ const char kElfMagic[] = {0x7f, 'E', 'L', 'F'};
+
+ ASSERT_THAT(WriteFd(fd.get(), &kElfMagic, sizeof(kElfMagic)),
+ SyscallSucceeds());
+ fd.reset();
+
+ pid_t child;
+ int execve_errno;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno));
+ EXPECT_EQ(execve_errno, ENOEXEC);
+}
+
+// Load a PIE ELF with a data + bss segment.
+TEST(ElfTest, PIE) {
+ ElfBinary<64> elf = StandardElf();
+
+ elf.header.e_type = ET_DYN;
+
+ // Create a standard ELF, but extend to 1.5 pages. The second page will be the
+ // beginning of a multi-page data + bss segment.
+ elf.data.resize(kPageSize + kPageSize / 2);
+
+ elf.header.e_entry = 0x0;
+
+ decltype(elf)::ElfPhdr phdr = {};
+ phdr.p_type = PT_LOAD;
+ phdr.p_flags = PF_R | PF_W;
+ phdr.p_offset = kPageSize;
+ // Put the data segment at a bit of an offset.
+ phdr.p_vaddr = 0x20000;
+ phdr.p_filesz = kPageSize / 2;
+ // The header is going to push vaddr up by a few hundred bytes. Keep p_memsz a
+ // bit less than 2 pages so this mapping doesn't extend beyond 0x43000.
+ phdr.p_memsz = 2 * kPageSize - kPageSize / 2;
+ elf.phdrs.push_back(phdr);
+
+ elf.UpdateOffsets();
+
+ // The first segment really needs to start at 0 for a normal PIE binary, and
+ // thus includes the headers.
+ const uint64_t offset = elf.phdrs[1].p_offset;
+ elf.phdrs[1].p_offset = 0x0;
+ elf.phdrs[1].p_vaddr = 0x0;
+ elf.phdrs[1].p_filesz += offset;
+ elf.phdrs[1].p_memsz += offset;
+
+ TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf));
+
+ pid_t child;
+ int execve_errno;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno));
+ ASSERT_EQ(execve_errno, 0);
+
+ ASSERT_NO_ERRNO(WaitStopped(child));
+
+ // RIP tells us which page the first segment was loaded into.
+ struct user_regs_struct regs;
+ ASSERT_THAT(ptrace(PTRACE_GETREGS, child, 0, &regs), SyscallSucceeds());
+
+ const uint64_t load_addr = regs.rip & ~(kPageSize - 1);
+
+ EXPECT_THAT(child, ContainsMappings(std::vector<ProcMapsEntry>({
+ // text page.
+ {load_addr, load_addr + 0x1000, true, false, true,
+ true, 0, 0, 0, 0, file.path().c_str()},
+ // data + bss page from file.
+ {load_addr + 0x20000, load_addr + 0x21000, true, true,
+ false, true, kPageSize, 0, 0, 0, file.path().c_str()},
+ // bss page from anon.
+ {load_addr + 0x21000, load_addr + 0x22000, true, true,
+ false, true, 0, 0, 0, 0, ""},
+ })));
+}
+
+// PIE binary with a non-zero start address.
+//
+// This is non-standard for a PIE binary, but valid. The binary is still loaded
+// at an arbitrary address, not the first PT_LOAD vaddr.
+//
+// N.B. Linux changed this behavior in d1fd836dcf00d2028c700c7e44d2c23404062c90.
+// Previously, with "randomization" enabled, PIE binaries with a non-zero start
+// address would be be loaded at the address they specified because mmap was
+// passed the load address, which wasn't 0 as expected.
+//
+// This change is present in kernel v4.1+.
+TEST(ElfTest, PIENonZeroStart) {
+ // gVisor has the newer behavior.
+ if (!IsRunningOnGvisor()) {
+ auto version = ASSERT_NO_ERRNO_AND_VALUE(GetKernelVersion());
+ SKIP_IF(version.major < 4 || (version.major == 4 && version.minor < 1));
+ }
+
+ ElfBinary<64> elf = StandardElf();
+
+ elf.header.e_type = ET_DYN;
+
+ // Create a standard ELF, but extend to 1.5 pages. The second page will be the
+ // beginning of a multi-page data + bss segment.
+ elf.data.resize(kPageSize + kPageSize / 2);
+
+ decltype(elf)::ElfPhdr phdr = {};
+ phdr.p_type = PT_LOAD;
+ phdr.p_flags = PF_R | PF_W;
+ phdr.p_offset = kPageSize;
+ // Put the data segment at a bit of an offset.
+ phdr.p_vaddr = 0x60000;
+ phdr.p_filesz = kPageSize / 2;
+ // The header is going to push vaddr up by a few hundred bytes. Keep p_memsz a
+ // bit less than 2 pages so this mapping doesn't extend beyond 0x43000.
+ phdr.p_memsz = 2 * kPageSize - kPageSize / 2;
+ elf.phdrs.push_back(phdr);
+
+ elf.UpdateOffsets();
+
+ TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf));
+
+ pid_t child;
+ int execve_errno;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno));
+ ASSERT_EQ(execve_errno, 0);
+
+ ASSERT_NO_ERRNO(WaitStopped(child));
+
+ // RIP tells us which page the first segment was loaded into.
+ struct user_regs_struct regs;
+ ASSERT_THAT(ptrace(PTRACE_GETREGS, child, 0, &regs), SyscallSucceeds());
+
+ const uint64_t load_addr = regs.rip & ~(kPageSize - 1);
+
+ // The ELF is loaded at an arbitrary address, not the first PT_LOAD vaddr.
+ //
+ // N.B. this is technically flaky, but Linux is *extremely* unlikely to pick
+ // this as the start address, as it searches from the top down.
+ EXPECT_NE(load_addr, 0x40000);
+
+ EXPECT_THAT(child, ContainsMappings(std::vector<ProcMapsEntry>({
+ // text page.
+ {load_addr, load_addr + 0x1000, true, false, true,
+ true, 0, 0, 0, 0, file.path().c_str()},
+ // data + bss page from file.
+ {load_addr + 0x20000, load_addr + 0x21000, true, true,
+ false, true, kPageSize, 0, 0, 0, file.path().c_str()},
+ // bss page from anon.
+ {load_addr + 0x21000, load_addr + 0x22000, true, true,
+ false, true, 0, 0, 0, 0, ""},
+ })));
+}
+
+TEST(ElfTest, PIEOutOfOrderSegments) {
+ // TODO: This triggers a bug in Linux where it computes the size
+ // of the binary as 0x20000 - 0x40000 = 0xfffffffffffe0000, which obviously
+ // fails to map.
+ //
+ // We test gVisor's behavior (of rejecting the binary) because I assert that
+ // Linux is wrong and needs to be fixed.
+ SKIP_IF(!IsRunningOnGvisor());
+
+ ElfBinary<64> elf = StandardElf();
+
+ elf.header.e_type = ET_DYN;
+
+ // Create a standard ELF, but extend to 1.5 pages. The second page will be the
+ // beginning of a multi-page data + bss segment.
+ elf.data.resize(kPageSize + kPageSize / 2);
+
+ decltype(elf)::ElfPhdr phdr = {};
+ phdr.p_type = PT_LOAD;
+ phdr.p_flags = PF_R | PF_W;
+ phdr.p_offset = kPageSize;
+ // Put the data segment *before* the first segment.
+ phdr.p_vaddr = 0x20000;
+ phdr.p_filesz = kPageSize / 2;
+ // The header is going to push vaddr up by a few hundred bytes. Keep p_memsz a
+ // bit less than 2 pages so this mapping doesn't extend beyond 0x43000.
+ phdr.p_memsz = 2 * kPageSize - kPageSize / 2;
+ elf.phdrs.push_back(phdr);
+
+ elf.UpdateOffsets();
+
+ TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf));
+
+ pid_t child;
+ int execve_errno;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno));
+ EXPECT_EQ(execve_errno, ENOEXEC);
+}
+
+// Standard dynamically linked binary with an ELF interpreter.
+TEST(ElfTest, ELFInterpreter) {
+ ElfBinary<64> interpreter = StandardElf();
+ interpreter.header.e_type = ET_DYN;
+ interpreter.header.e_entry = 0x0;
+ interpreter.UpdateOffsets();
+
+ // The first segment really needs to start at 0 for a normal PIE binary, and
+ // thus includes the headers.
+ uint64_t const offset = interpreter.phdrs[1].p_offset;
+ interpreter.phdrs[1].p_offset = 0x0;
+ interpreter.phdrs[1].p_vaddr = 0x0;
+ interpreter.phdrs[1].p_filesz += offset;
+ interpreter.phdrs[1].p_memsz += offset;
+
+ TempPath interpreter_file =
+ ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(interpreter));
+
+ ElfBinary<64> binary = StandardElf();
+
+ // Append the interpreter path.
+ int const interp_data_start = binary.data.size();
+ for (char const c : interpreter_file.path()) {
+ binary.data.push_back(c);
+ }
+ // NUL-terminate.
+ binary.data.push_back(0);
+ int const interp_data_size = binary.data.size() - interp_data_start;
+
+ decltype(binary)::ElfPhdr phdr = {};
+ phdr.p_type = PT_INTERP;
+ phdr.p_offset = interp_data_start;
+ phdr.p_filesz = interp_data_size;
+ phdr.p_memsz = interp_data_size;
+ // "If [PT_INTERP] is present, it must precede any loadable segment entry."
+ //
+ // However, Linux allows it anywhere, so we just stick it at the end to make
+ // sure out-of-order PT_INTERP is OK.
+ binary.phdrs.push_back(phdr);
+
+ binary.UpdateOffsets();
+
+ TempPath binary_file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(binary));
+
+ pid_t child;
+ int execve_errno;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ForkAndExec(
+ binary_file.path(), {binary_file.path()}, {}, &child, &execve_errno));
+ ASSERT_EQ(execve_errno, 0);
+
+ ASSERT_NO_ERRNO(WaitStopped(child));
+
+ // RIP tells us which page the first segment of the interpreter was loaded
+ // into.
+ struct user_regs_struct regs;
+ ASSERT_THAT(ptrace(PTRACE_GETREGS, child, 0, &regs), SyscallSucceeds());
+
+ const uint64_t interp_load_addr = regs.rip & ~(kPageSize - 1);
+
+ EXPECT_THAT(child,
+ ContainsMappings(std::vector<ProcMapsEntry>({
+ // Main binary
+ {0x40000, 0x41000, true, false, true, true, 0, 0, 0, 0,
+ binary_file.path().c_str()},
+ // Interpreter
+ {interp_load_addr, interp_load_addr + 0x1000, true, false,
+ true, true, 0, 0, 0, 0, interpreter_file.path().c_str()},
+ })));
+}
+
+// Test parameter to ElfInterpterStaticTest cases. The first item is a suffix to
+// add to the end of the interpreter path in the PT_INTERP segment and the
+// second is the expected execve(2) errno.
+using ElfInterpreterStaticParam = std::tuple<std::vector<char>, int>;
+
+class ElfInterpreterStaticTest
+ : public ::testing::TestWithParam<ElfInterpreterStaticParam> {};
+
+// Statically linked ELF with a statically linked ELF interpreter.
+TEST_P(ElfInterpreterStaticTest, Test) {
+ const std::vector<char> segment_suffix = std::get<0>(GetParam());
+ const int expected_errno = std::get<1>(GetParam());
+
+ ElfBinary<64> interpreter = StandardElf();
+ interpreter.UpdateOffsets();
+ TempPath interpreter_file =
+ ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(interpreter));
+
+ ElfBinary<64> binary = StandardElf();
+ // The PT_LOAD segment conflicts with the interpreter's PT_LOAD segment. The
+ // interpreter's will be mapped directly over the binary's.
+
+ // Interpreter path plus the parameterized suffix in the PT_INTERP segment.
+ const std::string path = interpreter_file.path();
+ std::vector<char> segment(path.begin(), path.end());
+ segment.insert(segment.end(), segment_suffix.begin(), segment_suffix.end());
+ binary.AddInterpreter(segment);
+
+ binary.UpdateOffsets();
+
+ TempPath binary_file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(binary));
+
+ pid_t child;
+ int execve_errno;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ForkAndExec(
+ binary_file.path(), {binary_file.path()}, {}, &child, &execve_errno));
+ ASSERT_EQ(execve_errno, expected_errno);
+
+ if (expected_errno == 0) {
+ ASSERT_NO_ERRNO(WaitStopped(child));
+
+ EXPECT_THAT(child, ContainsMappings(std::vector<ProcMapsEntry>({
+ // Interpreter.
+ {0x40000, 0x41000, true, false, true, true, 0, 0, 0,
+ 0, interpreter_file.path().c_str()},
+ })));
+ }
+}
+
+INSTANTIATE_TEST_CASE_P(
+ Cases, ElfInterpreterStaticTest,
+ ::testing::ValuesIn({
+ // Simple NUL-terminator to run the interpreter as normal.
+ std::make_tuple(std::vector<char>({'\0'}), 0),
+ // Add some garbage to the segment followed by a NUL-terminator. This is
+ // ignored.
+ std::make_tuple(std::vector<char>({'\0', 'b', '\0'}), 0),
+ // Add some garbage to the segment without a NUL-terminator. Linux will
+ // reject
+ // this.
+ std::make_tuple(std::vector<char>({'\0', 'b'}), ENOEXEC),
+ }));
+
+// Test parameter to ElfInterpterBadPathTest cases. The first item is the
+// contents of the PT_INTERP segment and the second is the expected execve(2)
+// errno.
+using ElfInterpreterBadPathParam = std::tuple<std::vector<char>, int>;
+
+class ElfInterpreterBadPathTest
+ : public ::testing::TestWithParam<ElfInterpreterBadPathParam> {};
+
+TEST_P(ElfInterpreterBadPathTest, Test) {
+ const std::vector<char> segment = std::get<0>(GetParam());
+ const int expected_errno = std::get<1>(GetParam());
+
+ ElfBinary<64> binary = StandardElf();
+ binary.AddInterpreter(segment);
+ binary.UpdateOffsets();
+
+ TempPath binary_file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(binary));
+
+ int execve_errno;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ForkAndExec(
+ binary_file.path(), {binary_file.path()}, {}, nullptr, &execve_errno));
+ EXPECT_EQ(execve_errno, expected_errno);
+}
+
+INSTANTIATE_TEST_CASE_P(
+ Cases, ElfInterpreterBadPathTest,
+ ::testing::ValuesIn({
+ // NUL-terminated fake path in the PT_INTERP segment.
+ std::make_tuple(std::vector<char>({'/', 'f', '/', 'b', '\0'}), ENOENT),
+ // ELF interpreter not NUL-terminated.
+ std::make_tuple(std::vector<char>({'/', 'f', '/', 'b'}), ENOEXEC),
+ // ELF interpreter path omitted entirely.
+ //
+ // fs/binfmt_elf.c:load_elf_binary returns ENOEXEC if p_filesz is < 2
+ // bytes.
+ std::make_tuple(std::vector<char>({'\0'}), ENOEXEC),
+ // ELF interpreter path = "\0".
+ //
+ // fs/binfmt_elf.c:load_elf_binary returns ENOEXEC if p_filesz is < 2
+ // bytes, so add an extra byte to pass that check.
+ //
+ // load_elf_binary -> open_exec -> do_open_execat fails to check that
+ // name != '\0' before calling do_filp_open, which thus opens the
+ // working directory. do_open_execat returns EACCES because the
+ // directory is not a regular file.
+ std::make_tuple(std::vector<char>({'\0', '\0'}), EACCES),
+ }));
+
+// Relative path to ELF interpreter.
+TEST(ElfTest, ELFInterpreterRelative) {
+ ElfBinary<64> interpreter = StandardElf();
+ interpreter.header.e_type = ET_DYN;
+ interpreter.header.e_entry = 0x0;
+ interpreter.UpdateOffsets();
+
+ // The first segment really needs to start at 0 for a normal PIE binary, and
+ // thus includes the headers.
+ uint64_t const offset = interpreter.phdrs[1].p_offset;
+ interpreter.phdrs[1].p_offset = 0x0;
+ interpreter.phdrs[1].p_vaddr = 0x0;
+ interpreter.phdrs[1].p_filesz += offset;
+ interpreter.phdrs[1].p_memsz += offset;
+
+ TempPath interpreter_file =
+ ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(interpreter));
+ auto cwd = ASSERT_NO_ERRNO_AND_VALUE(GetCWD());
+ auto interpreter_relative =
+ ASSERT_NO_ERRNO_AND_VALUE(GetRelativePath(cwd, interpreter_file.path()));
+
+ ElfBinary<64> binary = StandardElf();
+
+ // NUL-terminated path in the PT_INTERP segment.
+ std::vector<char> segment(interpreter_relative.begin(),
+ interpreter_relative.end());
+ segment.push_back(0);
+ binary.AddInterpreter(segment);
+
+ binary.UpdateOffsets();
+
+ TempPath binary_file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(binary));
+
+ pid_t child;
+ int execve_errno;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ForkAndExec(
+ binary_file.path(), {binary_file.path()}, {}, &child, &execve_errno));
+ ASSERT_EQ(execve_errno, 0);
+
+ ASSERT_NO_ERRNO(WaitStopped(child));
+
+ // RIP tells us which page the first segment of the interpreter was loaded
+ // into.
+ struct user_regs_struct regs;
+ ASSERT_THAT(ptrace(PTRACE_GETREGS, child, 0, &regs), SyscallSucceeds());
+
+ const uint64_t interp_load_addr = regs.rip & ~(kPageSize - 1);
+
+ EXPECT_THAT(child,
+ ContainsMappings(std::vector<ProcMapsEntry>({
+ // Main binary
+ {0x40000, 0x41000, true, false, true, true, 0, 0, 0, 0,
+ binary_file.path().c_str()},
+ // Interpreter
+ {interp_load_addr, interp_load_addr + 0x1000, true, false,
+ true, true, 0, 0, 0, 0, interpreter_file.path().c_str()},
+ })));
+}
+
+// ELF interpreter architecture doesn't match the binary.
+TEST(ElfTest, ELFInterpreterWrongArch) {
+ ElfBinary<64> interpreter = StandardElf();
+ interpreter.header.e_machine = EM_PPC64;
+ interpreter.header.e_type = ET_DYN;
+ interpreter.header.e_entry = 0x0;
+ interpreter.UpdateOffsets();
+
+ // The first segment really needs to start at 0 for a normal PIE binary, and
+ // thus includes the headers.
+ uint64_t const offset = interpreter.phdrs[1].p_offset;
+ interpreter.phdrs[1].p_offset = 0x0;
+ interpreter.phdrs[1].p_vaddr = 0x0;
+ interpreter.phdrs[1].p_filesz += offset;
+ interpreter.phdrs[1].p_memsz += offset;
+
+ TempPath interpreter_file =
+ ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(interpreter));
+
+ ElfBinary<64> binary = StandardElf();
+
+ // NUL-terminated path in the PT_INTERP segment.
+ const std::string path = interpreter_file.path();
+ std::vector<char> segment(path.begin(), path.end());
+ segment.push_back(0);
+ binary.AddInterpreter(segment);
+
+ binary.UpdateOffsets();
+
+ TempPath binary_file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(binary));
+
+ pid_t child;
+ int execve_errno;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ForkAndExec(
+ binary_file.path(), {binary_file.path()}, {}, &child, &execve_errno));
+ ASSERT_EQ(execve_errno, ELIBBAD);
+}
+
+// No execute permissions on the binary.
+TEST(ElfTest, NoExecute) {
+ ElfBinary<64> elf = StandardElf();
+ elf.UpdateOffsets();
+
+ TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf));
+
+ ASSERT_THAT(chmod(file.path().c_str(), 0644), SyscallSucceeds());
+
+ pid_t child;
+ int execve_errno;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno));
+ EXPECT_EQ(execve_errno, EACCES);
+}
+
+// Execute, but no read permissions on the binary works just fine.
+TEST(ElfTest, NoRead) {
+ // TODO: gVisor's backing filesystem may prevent the sentry from
+ // reading the executable.
+ SKIP_IF(IsRunningOnGvisor());
+
+ ElfBinary<64> elf = StandardElf();
+ elf.UpdateOffsets();
+
+ TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf));
+
+ ASSERT_THAT(chmod(file.path().c_str(), 0111), SyscallSucceeds());
+
+ pid_t child;
+ int execve_errno;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno));
+ ASSERT_EQ(execve_errno, 0);
+
+ ASSERT_NO_ERRNO(WaitStopped(child));
+
+ // TODO: A task with a non-readable executable is marked
+ // non-dumpable, preventing access to proc files. gVisor does not implement
+ // this behavior.
+}
+
+// No execute permissions on the ELF interpreter.
+TEST(ElfTest, ElfInterpreterNoExecute) {
+ ElfBinary<64> interpreter = StandardElf();
+ interpreter.header.e_type = ET_DYN;
+ interpreter.header.e_entry = 0x0;
+ interpreter.UpdateOffsets();
+
+ // The first segment really needs to start at 0 for a normal PIE binary, and
+ // thus includes the headers.
+ uint64_t const offset = interpreter.phdrs[1].p_offset;
+ interpreter.phdrs[1].p_offset = 0x0;
+ interpreter.phdrs[1].p_vaddr = 0x0;
+ interpreter.phdrs[1].p_filesz += offset;
+ interpreter.phdrs[1].p_memsz += offset;
+
+ TempPath interpreter_file =
+ ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(interpreter));
+
+ ElfBinary<64> binary = StandardElf();
+
+ // NUL-terminated path in the PT_INTERP segment.
+ const std::string path = interpreter_file.path();
+ std::vector<char> segment(path.begin(), path.end());
+ segment.push_back(0);
+ binary.AddInterpreter(segment);
+
+ binary.UpdateOffsets();
+
+ TempPath binary_file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(binary));
+
+ ASSERT_THAT(chmod(interpreter_file.path().c_str(), 0644), SyscallSucceeds());
+
+ pid_t child;
+ int execve_errno;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ ForkAndExec(interpreter_file.path(), {interpreter_file.path()}, {},
+ &child, &execve_errno));
+ EXPECT_EQ(execve_errno, EACCES);
+}
+
+// Execute a basic interpreter script.
+TEST(InterpreterScriptTest, Execute) {
+ ElfBinary<64> elf = StandardElf();
+ elf.UpdateOffsets();
+ // Use /tmp explicitly to ensure the path is short enough.
+ TempPath binary = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith("/tmp", elf));
+
+ TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), absl::StrCat("#!", binary.path()), 0755));
+
+ pid_t child;
+ int execve_errno;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ ForkAndExec(script.path(), {script.path()}, {}, &child, &execve_errno));
+ ASSERT_EQ(execve_errno, 0);
+
+ EXPECT_NO_ERRNO(WaitStopped(child));
+}
+
+// Whitespace after #!.
+TEST(InterpreterScriptTest, Whitespace) {
+ ElfBinary<64> elf = StandardElf();
+ elf.UpdateOffsets();
+ // Use /tmp explicitly to ensure the path is short enough.
+ TempPath binary = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith("/tmp", elf));
+
+ TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), absl::StrCat("#! \t \t", binary.path()), 0755));
+
+ pid_t child;
+ int execve_errno;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ ForkAndExec(script.path(), {script.path()}, {}, &child, &execve_errno));
+ ASSERT_EQ(execve_errno, 0);
+
+ EXPECT_NO_ERRNO(WaitStopped(child));
+}
+
+// Interpreter script is missing execute permission.
+TEST(InterpreterScriptTest, InterpreterScriptNoExecute) {
+ ElfBinary<64> elf = StandardElf();
+ elf.UpdateOffsets();
+ // Use /tmp explicitly to ensure the path is short enough.
+ TempPath binary = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith("/tmp", elf));
+
+ TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), absl::StrCat("#!", binary.path()), 0644));
+
+ pid_t child;
+ int execve_errno;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ ForkAndExec(script.path(), {script.path()}, {}, &child, &execve_errno));
+ ASSERT_EQ(execve_errno, EACCES);
+}
+
+// Binary interpreter script refers to is missing execute permission.
+TEST(InterpreterScriptTest, BinaryNoExecute) {
+ ElfBinary<64> elf = StandardElf();
+ elf.UpdateOffsets();
+ // Use /tmp explicitly to ensure the path is short enough.
+ TempPath binary = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith("/tmp", elf));
+
+ ASSERT_THAT(chmod(binary.path().c_str(), 0644), SyscallSucceeds());
+
+ TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), absl::StrCat("#!", binary.path()), 0755));
+
+ pid_t child;
+ int execve_errno;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ ForkAndExec(script.path(), {script.path()}, {}, &child, &execve_errno));
+ ASSERT_EQ(execve_errno, EACCES);
+}
+
+// Linux will load interpreter scripts five levels deep, but no more.
+TEST(InterpreterScriptTest, MaxRecursion) {
+ ElfBinary<64> elf = StandardElf();
+ elf.UpdateOffsets();
+ // Use /tmp explicitly to ensure the path is short enough.
+ TempPath binary = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith("/tmp", elf));
+
+ TempPath script1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ "/tmp", absl::StrCat("#!", binary.path()), 0755));
+ TempPath script2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ "/tmp", absl::StrCat("#!", script1.path()), 0755));
+ TempPath script3 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ "/tmp", absl::StrCat("#!", script2.path()), 0755));
+ TempPath script4 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ "/tmp", absl::StrCat("#!", script3.path()), 0755));
+ TempPath script5 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ "/tmp", absl::StrCat("#!", script4.path()), 0755));
+ TempPath script6 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ "/tmp", absl::StrCat("#!", script5.path()), 0755));
+
+ pid_t child;
+ int execve_errno;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ ForkAndExec(script6.path(), {script6.path()}, {}, &child, &execve_errno));
+ // Too many levels of recursion.
+ EXPECT_EQ(execve_errno, ELOOP);
+
+ // The next level up is OK.
+ auto cleanup2 = ASSERT_NO_ERRNO_AND_VALUE(
+ ForkAndExec(script5.path(), {script5.path()}, {}, &child, &execve_errno));
+ ASSERT_EQ(execve_errno, 0);
+
+ EXPECT_NO_ERRNO(WaitStopped(child));
+}
+
+// Interpreter script with a relative path.
+TEST(InterpreterScriptTest, RelativePath) {
+ ElfBinary<64> elf = StandardElf();
+ elf.UpdateOffsets();
+ TempPath binary = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith("/tmp", elf));
+
+ auto cwd = ASSERT_NO_ERRNO_AND_VALUE(GetCWD());
+ auto binary_relative =
+ ASSERT_NO_ERRNO_AND_VALUE(GetRelativePath(cwd, binary.path()));
+
+ TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), absl::StrCat("#!", binary_relative), 0755));
+
+ pid_t child;
+ int execve_errno;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ ForkAndExec(script.path(), {script.path()}, {}, &child, &execve_errno));
+ ASSERT_EQ(execve_errno, 0);
+
+ EXPECT_NO_ERRNO(WaitStopped(child));
+}
+
+// Interpreter script with .. in a path component.
+TEST(InterpreterScriptTest, UncleanPath) {
+ ElfBinary<64> elf = StandardElf();
+ elf.UpdateOffsets();
+ // Use /tmp explicitly to ensure the path is short enough.
+ TempPath binary = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith("/tmp", elf));
+
+ TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), absl::StrCat("#!/tmp/../", binary.path()),
+ 0755));
+
+ pid_t child;
+ int execve_errno;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ ForkAndExec(script.path(), {script.path()}, {}, &child, &execve_errno));
+ ASSERT_EQ(execve_errno, 0);
+
+ EXPECT_NO_ERRNO(WaitStopped(child));
+}
+
+// Passed interpreter script is a symlink.
+TEST(InterpreterScriptTest, Symlink) {
+ ElfBinary<64> elf = StandardElf();
+ elf.UpdateOffsets();
+ // Use /tmp explicitly to ensure the path is short enough.
+ TempPath binary = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith("/tmp", elf));
+
+ TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), absl::StrCat("#!", binary.path()), 0755));
+
+ TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
+ TempPath::CreateSymlinkTo(GetAbsoluteTestTmpdir(), script.path()));
+
+ pid_t child;
+ int execve_errno;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ ForkAndExec(link.path(), {link.path()}, {}, &child, &execve_errno));
+ ASSERT_EQ(execve_errno, 0);
+
+ EXPECT_NO_ERRNO(WaitStopped(child));
+}
+
+// Interpreter script points to a symlink loop.
+TEST(InterpreterScriptTest, SymlinkLoop) {
+ std::string const link1 = NewTempAbsPathInDir("/tmp");
+ std::string const link2 = NewTempAbsPathInDir("/tmp");
+
+ ASSERT_THAT(symlink(link2.c_str(), link1.c_str()), SyscallSucceeds());
+ auto remove_link1 = Cleanup(
+ [&link1] { EXPECT_THAT(unlink(link1.c_str()), SyscallSucceeds()); });
+
+ ASSERT_THAT(symlink(link1.c_str(), link2.c_str()), SyscallSucceeds());
+ auto remove_link2 = Cleanup(
+ [&link2] { EXPECT_THAT(unlink(link2.c_str()), SyscallSucceeds()); });
+
+ TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), absl::StrCat("#!", link1), 0755));
+
+ pid_t child;
+ int execve_errno;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ ForkAndExec(script.path(), {script.path()}, {}, &child, &execve_errno));
+ EXPECT_EQ(execve_errno, ELOOP);
+}
+
+// Binary is a symlink loop.
+TEST(ExecveTest, SymlinkLoop) {
+ std::string const link1 = NewTempAbsPathInDir("/tmp");
+ std::string const link2 = NewTempAbsPathInDir("/tmp");
+
+ ASSERT_THAT(symlink(link2.c_str(), link1.c_str()), SyscallSucceeds());
+ auto remove_link = Cleanup(
+ [&link1] { EXPECT_THAT(unlink(link1.c_str()), SyscallSucceeds()); });
+
+ ASSERT_THAT(symlink(link1.c_str(), link2.c_str()), SyscallSucceeds());
+ auto remove_link2 = Cleanup(
+ [&link2] { EXPECT_THAT(unlink(link2.c_str()), SyscallSucceeds()); });
+
+ pid_t child;
+ int execve_errno;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ ForkAndExec(link1, {link1}, {}, &child, &execve_errno));
+ EXPECT_EQ(execve_errno, ELOOP);
+}
+
+// Binary is a directory.
+TEST(ExecveTest, Directory) {
+ pid_t child;
+ int execve_errno;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ ForkAndExec("/tmp", {"/tmp"}, {}, &child, &execve_errno));
+ EXPECT_EQ(execve_errno, EACCES);
+}
+
+// Pass a valid binary as a directory (extra / on the end).
+TEST(ExecveTest, BinaryAsDirectory) {
+ ElfBinary<64> elf = StandardElf();
+ elf.UpdateOffsets();
+ TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf));
+
+ std::string const path = absl::StrCat(file.path(), "/");
+
+ pid_t child;
+ int execve_errno;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ ForkAndExec(path, {path}, {}, &child, &execve_errno));
+ EXPECT_EQ(execve_errno, ENOTDIR);
+}
+
+// The initial brk value is after the page at the end of the binary.
+TEST(ExecveTest, BrkAfterBinary) {
+ ElfBinary<64> elf = StandardElf();
+ elf.UpdateOffsets();
+
+ TempPath file = ASSERT_NO_ERRNO_AND_VALUE(CreateElfWith(elf));
+
+ pid_t child;
+ int execve_errno;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ ForkAndExec(file.path(), {file.path()}, {}, &child, &execve_errno));
+ ASSERT_EQ(execve_errno, 0);
+
+ // Ensure it made it to SIGSTOP.
+ ASSERT_NO_ERRNO(WaitStopped(child));
+
+ struct user_regs_struct regs;
+ ASSERT_THAT(ptrace(PTRACE_GETREGS, child, 0, &regs), SyscallSucceeds());
+
+ // RIP is just beyond the final syscall instruction. Rewind to execute a brk
+ // syscall.
+ regs.rip -= kSyscallSize;
+ regs.rax = __NR_brk;
+ regs.rdi = 0;
+ ASSERT_THAT(ptrace(PTRACE_SETREGS, child, 0, &regs), SyscallSucceeds());
+
+ // Resume the child, waiting for syscall entry.
+ ASSERT_THAT(ptrace(PTRACE_SYSCALL, child, 0, 0), SyscallSucceeds());
+ int status;
+ ASSERT_THAT(RetryEINTR(waitpid)(child, &status, 0),
+ SyscallSucceedsWithValue(child));
+ ASSERT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP)
+ << "status = " << status;
+
+ // Execute the syscall.
+ ASSERT_THAT(ptrace(PTRACE_SYSCALL, child, 0, 0), SyscallSucceeds());
+ ASSERT_THAT(RetryEINTR(waitpid)(child, &status, 0),
+ SyscallSucceedsWithValue(child));
+ ASSERT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP)
+ << "status = " << status;
+
+ ASSERT_THAT(ptrace(PTRACE_GETREGS, child, 0, &regs), SyscallSucceeds());
+
+ // brk is after the text page.
+ //
+ // The kernel does brk randomization, so we can't be sure what the exact
+ // address will be, but it is always beyond the final page in the binary.
+ // i.e., it does not start immediately after memsz in the middle of a page.
+ // Userspace may expect to use that space.
+ EXPECT_GE(regs.rax, 0x41000);
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/exec_proc_exe_workload.cc b/test/syscalls/linux/exec_proc_exe_workload.cc
new file mode 100644
index 000000000..b9a4ac749
--- /dev/null
+++ b/test/syscalls/linux/exec_proc_exe_workload.cc
@@ -0,0 +1,35 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <iostream>
+
+#include "test/util/fs_util.h"
+#include "test/util/posix_error.h"
+
+int main(int argc, char** argv, char** envp) {
+ std::string exe = gvisor::testing::ProcessExePath(getpid()).ValueOrDie();
+ if (exe[0] != '/') {
+ std::cerr << "relative path: " << exe << std::endl;
+ exit(1);
+ }
+ if (exe.find(argv[1]) != std::string::npos) {
+ std::cerr << "matching path: " << exe << std::endl;
+ exit(1);
+ }
+
+ return 0;
+}
diff --git a/test/syscalls/linux/exec_state_workload.cc b/test/syscalls/linux/exec_state_workload.cc
new file mode 100644
index 000000000..b66e22565
--- /dev/null
+++ b/test/syscalls/linux/exec_state_workload.cc
@@ -0,0 +1,202 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <signal.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <sys/time.h>
+#include <iostream>
+#include <ostream>
+#include <string>
+
+// Pretty-print a sigset_t.
+std::ostream& operator<<(std::ostream& out, const sigset_t& s) {
+ out << "{ ";
+
+ for (int i = 0; i < NSIG; i++) {
+ if (sigismember(&s, i)) {
+ out << i << " ";
+ }
+ }
+
+ out << "}";
+ return out;
+}
+
+// Verify that the signo handler is handler.
+int CheckSigHandler(uint32_t signo, uintptr_t handler) {
+ struct sigaction sa;
+ int ret = sigaction(signo, nullptr, &sa);
+ if (ret < 0) {
+ perror("sigaction");
+ return 1;
+ }
+
+ if (reinterpret_cast<void (*)(int)>(handler) != sa.sa_handler) {
+ std::cerr << "signo " << signo << " handler got: " << sa.sa_handler
+ << " expected: " << std::hex << handler;
+ return 1;
+ }
+ return 0;
+}
+
+// Verify that the signo is blocked.
+int CheckSigBlocked(uint32_t signo) {
+ sigset_t s;
+ int ret = sigprocmask(SIG_SETMASK, nullptr, &s);
+ if (ret < 0) {
+ perror("sigprocmask");
+ return 1;
+ }
+
+ if (!sigismember(&s, signo)) {
+ std::cerr << "signal " << signo << " not blocked in signal mask: " << s
+ << std::endl;
+ return 1;
+ }
+ return 0;
+}
+
+// Verify that the itimer is enabled.
+int CheckItimerEnabled(uint32_t timer) {
+ struct itimerval itv;
+ int ret = getitimer(timer, &itv);
+ if (ret < 0) {
+ perror("getitimer");
+ return 1;
+ }
+
+ if (!itv.it_value.tv_sec && !itv.it_value.tv_usec &&
+ !itv.it_interval.tv_sec && !itv.it_interval.tv_usec) {
+ std::cerr << "timer " << timer
+ << " not enabled. value sec: " << itv.it_value.tv_sec
+ << " usec: " << itv.it_value.tv_usec
+ << " interval sec: " << itv.it_interval.tv_sec
+ << " usec: " << itv.it_interval.tv_usec << std::endl;
+ return 1;
+ }
+ return 0;
+}
+
+int PrintExecFn() {
+ unsigned long execfn = getauxval(AT_EXECFN);
+ if (!execfn) {
+ std::cerr << "AT_EXECFN missing" << std::endl;
+ return 1;
+ }
+
+ std::cerr << reinterpret_cast<const char*>(execfn) << std::endl;
+ return 0;
+}
+
+int PrintExecName() {
+ const size_t name_length = 20;
+ char name[name_length] = {0};
+ if (prctl(PR_GET_NAME, name) < 0) {
+ std::cerr << "prctl(PR_GET_NAME) failed" << std::endl;
+ return 1;
+ }
+
+ std::cerr << name << std::endl;
+ return 0;
+}
+
+void usage(const std::string& prog) {
+ std::cerr << "usage:\n"
+ << "\t" << prog << " CheckSigHandler <signo> <handler addr (hex)>\n"
+ << "\t" << prog << " CheckSigBlocked <signo>\n"
+ << "\t" << prog << " CheckTimerDisabled <timer>\n"
+ << "\t" << prog << " PrintExecFn\n"
+ << "\t" << prog << " PrintExecName" << std::endl;
+}
+
+int main(int argc, char** argv) {
+ if (argc < 2) {
+ usage(argv[0]);
+ return 1;
+ }
+
+ std::string func(argv[1]);
+
+ if (func == "CheckSigHandler") {
+ if (argc != 4) {
+ usage(argv[0]);
+ return 1;
+ }
+
+ char* end;
+ uint32_t signo = strtoul(argv[2], &end, 10);
+ if (end == argv[2]) {
+ std::cerr << "invalid signo: " << argv[2] << std::endl;
+ return 1;
+ }
+
+ uintptr_t handler = strtoull(argv[3], &end, 16);
+ if (end == argv[3]) {
+ std::cerr << "invalid handler: " << std::hex << argv[3] << std::endl;
+ return 1;
+ }
+
+ return CheckSigHandler(signo, handler);
+ }
+
+ if (func == "CheckSigBlocked") {
+ if (argc != 3) {
+ usage(argv[0]);
+ return 1;
+ }
+
+ char* end;
+ uint32_t signo = strtoul(argv[2], &end, 10);
+ if (end == argv[2]) {
+ std::cerr << "invalid signo: " << argv[2] << std::endl;
+ return 1;
+ }
+
+ return CheckSigBlocked(signo);
+ }
+
+ if (func == "CheckItimerEnabled") {
+ if (argc != 3) {
+ usage(argv[0]);
+ return 1;
+ }
+
+ char* end;
+ uint32_t timer = strtoul(argv[2], &end, 10);
+ if (end == argv[2]) {
+ std::cerr << "invalid signo: " << argv[2] << std::endl;
+ return 1;
+ }
+
+ return CheckItimerEnabled(timer);
+ }
+
+ if (func == "PrintExecFn") {
+ // N.B. This will be called as an interpreter script, with the script passed
+ // as the third argument. We don't care about that script.
+ return PrintExecFn();
+ }
+
+ if (func == "PrintExecName") {
+ // N.B. This may be called as an interpreter script like PrintExecFn.
+ return PrintExecName();
+ }
+
+ std::cerr << "Invalid function: " << func << std::endl;
+ return 1;
+}
diff --git a/test/syscalls/linux/exit.cc b/test/syscalls/linux/exit.cc
new file mode 100644
index 000000000..7246a7b3b
--- /dev/null
+++ b/test/syscalls/linux/exit.cc
@@ -0,0 +1,77 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "absl/time/time.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+void TestExit(int code) {
+ pid_t pid = fork();
+ if (pid == 0) {
+ _exit(code);
+ }
+
+ ASSERT_THAT(pid, SyscallSucceeds());
+
+ int status;
+ EXPECT_THAT(RetryEINTR(waitpid)(pid, &status, 0), SyscallSucceeds());
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == code) << status;
+}
+
+TEST(ExitTest, Success) { TestExit(0); }
+
+TEST(ExitTest, Failure) { TestExit(1); }
+
+// This test ensures that a process's file descriptors are closed when it calls
+// exit(). In order to test this, the parent tries to read from a pipe whose
+// write end is held by the child. While the read is blocking, the child exits,
+// which should cause the parent to read 0 bytes due to EOF.
+TEST(ExitTest, CloseFds) {
+ int pipe_fds[2];
+ ASSERT_THAT(pipe(pipe_fds), SyscallSucceeds());
+
+ FileDescriptor read_fd(pipe_fds[0]);
+ FileDescriptor write_fd(pipe_fds[1]);
+
+ pid_t pid = fork();
+ if (pid == 0) {
+ read_fd.reset();
+
+ SleepSafe(absl::Seconds(10));
+
+ _exit(0);
+ }
+
+ EXPECT_THAT(pid, SyscallSucceeds());
+
+ write_fd.reset();
+
+ char buf[10];
+ EXPECT_THAT(ReadFd(read_fd.get(), buf, sizeof(buf)),
+ SyscallSucceedsWithValue(0));
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/exit_script.sh b/test/syscalls/linux/exit_script.sh
new file mode 100755
index 000000000..f014fcf99
--- /dev/null
+++ b/test/syscalls/linux/exit_script.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+# Copyright 2018 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if [ $# -ne 1 ]; then
+ echo "Usage: $0 exit_code"
+ exit 255
+fi
+
+exit $1
diff --git a/test/syscalls/linux/fadvise64.cc b/test/syscalls/linux/fadvise64.cc
new file mode 100644
index 000000000..041e8b7b6
--- /dev/null
+++ b/test/syscalls/linux/fadvise64.cc
@@ -0,0 +1,72 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <syscall.h>
+#include <unistd.h>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+TEST(FAdvise64Test, Basic) {
+ auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ const auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY));
+
+ // fadvise64 is noop in gVisor, so just test that it succeeds.
+ ASSERT_THAT(syscall(__NR_fadvise64, fd.get(), 0, 10, POSIX_FADV_NORMAL),
+ SyscallSucceeds());
+ ASSERT_THAT(syscall(__NR_fadvise64, fd.get(), 0, 10, POSIX_FADV_RANDOM),
+ SyscallSucceeds());
+ ASSERT_THAT(syscall(__NR_fadvise64, fd.get(), 0, 10, POSIX_FADV_SEQUENTIAL),
+ SyscallSucceeds());
+ ASSERT_THAT(syscall(__NR_fadvise64, fd.get(), 0, 10, POSIX_FADV_WILLNEED),
+ SyscallSucceeds());
+ ASSERT_THAT(syscall(__NR_fadvise64, fd.get(), 0, 10, POSIX_FADV_DONTNEED),
+ SyscallSucceeds());
+ ASSERT_THAT(syscall(__NR_fadvise64, fd.get(), 0, 10, POSIX_FADV_NOREUSE),
+ SyscallSucceeds());
+}
+
+TEST(FAdvise64Test, InvalidArgs) {
+ auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ const auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY));
+
+ // Note: offset is allowed to be negative.
+ ASSERT_THAT(syscall(__NR_fadvise64, fd.get(), 0, static_cast<off_t>(-1),
+ POSIX_FADV_NORMAL),
+ SyscallFailsWithErrno(EINVAL));
+ ASSERT_THAT(syscall(__NR_fadvise64, fd.get(), 0, 10, 12345),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(FAdvise64Test, NoPipes) {
+ int fds[2];
+ ASSERT_THAT(pipe(fds), SyscallSucceeds());
+ const FileDescriptor read(fds[0]);
+ const FileDescriptor write(fds[1]);
+
+ ASSERT_THAT(syscall(__NR_fadvise64, read.get(), 0, 10, POSIX_FADV_NORMAL),
+ SyscallFailsWithErrno(ESPIPE));
+}
+
+} // namespace
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/fallocate.cc b/test/syscalls/linux/fallocate.cc
new file mode 100644
index 000000000..53aedd4e4
--- /dev/null
+++ b/test/syscalls/linux/fallocate.cc
@@ -0,0 +1,57 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <sys/eventfd.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// These tests are very rudimentary because fallocate is not
+// implemented. We just want to make sure the expected error codes are
+// returned.
+
+TEST(FallocateTest, NotImplemented) {
+ auto temp_path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(temp_path.path(), O_RDWR));
+
+ // Test that a completely unassigned fallocate mode returns EOPNOTSUPP.
+ ASSERT_THAT(fallocate(fd.get(), 0x80, 0, 32768),
+ SyscallFailsWithErrno(EOPNOTSUPP));
+}
+
+TEST(FallocateTest, BadOffset) {
+ auto temp_path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(temp_path.path(), O_RDWR));
+ ASSERT_THAT(fallocate(fd.get(), 0, -1, 32768), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(FallocateTest, BadLength) {
+ auto temp_path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(temp_path.path(), O_RDWR));
+ ASSERT_THAT(fallocate(fd.get(), 0, 0, -1), SyscallFailsWithErrno(EINVAL));
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/fault.cc b/test/syscalls/linux/fault.cc
new file mode 100644
index 000000000..cfa7d0d1f
--- /dev/null
+++ b/test/syscalls/linux/fault.cc
@@ -0,0 +1,71 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#define _GNU_SOURCE 1
+#include <signal.h>
+#include <ucontext.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+__attribute__((noinline)) void Fault(void) {
+ volatile int* foo = nullptr;
+ *foo = 0;
+}
+
+int GetPcFromUcontext(ucontext_t* uc, uintptr_t* pc) {
+#if defined(__x86_64__)
+ *pc = uc->uc_mcontext.gregs[REG_RIP];
+ return 1;
+#elif defined(__i386__)
+ *pc = uc->uc_mcontext.gregs[REG_EIP];
+ return 1;
+#else
+ return 0;
+#endif
+}
+
+void sigact_handler(int sig, siginfo_t* siginfo, void* context) {
+ uintptr_t pc;
+ if (GetPcFromUcontext(reinterpret_cast<ucontext_t*>(context), &pc)) {
+ /* Expect Fault() to be at most 64 bytes in size. */
+ uintptr_t fault_addr = reinterpret_cast<uintptr_t>(&Fault);
+ EXPECT_GE(pc, fault_addr);
+ EXPECT_LT(pc, fault_addr + 64);
+ exit(0);
+ }
+}
+
+TEST(FaultTest, InRange) {
+ // Reset the signal handler to do nothing so that it doesn't freak out
+ // the test runner when we fire an alarm.
+ struct sigaction sa = {};
+ sa.sa_sigaction = sigact_handler;
+ sigfillset(&sa.sa_mask);
+ sa.sa_flags = SA_SIGINFO;
+ ASSERT_THAT(sigaction(SIGSEGV, &sa, nullptr), SyscallSucceeds());
+
+ Fault();
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/fchdir.cc b/test/syscalls/linux/fchdir.cc
new file mode 100644
index 000000000..2b13e36c3
--- /dev/null
+++ b/test/syscalls/linux/fchdir.cc
@@ -0,0 +1,77 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "test/util/capability_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(FchdirTest, Success) {
+ auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ int fd;
+ ASSERT_THAT(fd = open(temp_dir.path().c_str(), O_DIRECTORY | O_RDONLY),
+ SyscallSucceeds());
+
+ EXPECT_THAT(fchdir(fd), SyscallSucceeds());
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+ // Change CWD to a permanent location as temp dirs will be cleaned up.
+ EXPECT_THAT(chdir("/"), SyscallSucceeds());
+}
+
+TEST(FchdirTest, InvalidFD) {
+ EXPECT_THAT(fchdir(-1), SyscallFailsWithErrno(EBADF));
+}
+
+TEST(FchdirTest, PermissionDenied) {
+ // Drop capabilities that allow us to override directory permissions.
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+ auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(
+ TempPath::CreateDirWith(GetAbsoluteTestTmpdir(), 0666 /* mode */));
+
+ int fd;
+ ASSERT_THAT(fd = open(temp_dir.path().c_str(), O_DIRECTORY | O_RDONLY),
+ SyscallSucceeds());
+
+ EXPECT_THAT(fchdir(fd), SyscallFailsWithErrno(EACCES));
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+TEST(FchdirTest, NotDir) {
+ auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+ int fd;
+ ASSERT_THAT(fd = open(temp_file.path().c_str(), O_CREAT | O_RDONLY, 0777),
+ SyscallSucceeds());
+
+ EXPECT_THAT(fchdir(fd), SyscallFailsWithErrno(ENOTDIR));
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/fcntl.cc b/test/syscalls/linux/fcntl.cc
new file mode 100644
index 000000000..355334bfa
--- /dev/null
+++ b/test/syscalls/linux/fcntl.cc
@@ -0,0 +1,978 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/eventfd.h>
+#include <syscall.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "absl/base/macros.h"
+#include "absl/base/port.h"
+#include "absl/memory/memory.h"
+#include "absl/strings/str_cat.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/cleanup.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+#include "test/util/timer_util.h"
+
+DEFINE_string(child_setlock_on, "",
+ "Contains the path to try to set a file lock on.");
+DEFINE_bool(child_setlock_write, false,
+ "Whether to set a writable lock (otherwise readable)");
+DEFINE_bool(blocking, false,
+ "Whether to set a blocking lock (otherwise non-blocking).");
+DEFINE_bool(retry_eintr, false, "Whether to retry in the subprocess on EINTR.");
+DEFINE_uint64(child_setlock_start, 0, "The value of struct flock start");
+DEFINE_uint64(child_setlock_len, 0, "The value of struct flock len");
+DEFINE_int32(socket_fd, -1,
+ "A socket to use for communicating more state back "
+ "to the parent.");
+
+namespace gvisor {
+namespace testing {
+
+// O_LARGEFILE as defined by Linux. glibc tries to be clever by setting it to 0
+// because "it isn't needed", even though Linux can return it via F_GETFL.
+constexpr int kOLargeFile = 00100000;
+
+class FcntlLockTest : public ::testing::Test {
+ public:
+ void SetUp() override {
+ // Let's make a socket pair.
+ ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, fds_), SyscallSucceeds());
+ }
+
+ void TearDown() override {
+ EXPECT_THAT(close(fds_[0]), SyscallSucceeds());
+ EXPECT_THAT(close(fds_[1]), SyscallSucceeds());
+ }
+
+ int64_t GetSubprocessFcntlTimeInUsec() {
+ int64_t ret = 0;
+ EXPECT_THAT(ReadFd(fds_[0], reinterpret_cast<void*>(&ret), sizeof(ret)),
+ SyscallSucceedsWithValue(sizeof(ret)));
+ return ret;
+ }
+
+ // The first fd will remain with the process creating the subprocess
+ // and the second will go to the subprocess.
+ int fds_[2] = {};
+};
+
+namespace {
+
+PosixErrorOr<Cleanup> SubprocessLock(std::string const& path, bool for_write,
+ bool blocking, bool retry_eintr, int fd,
+ off_t start, off_t length, pid_t* child) {
+ std::vector<std::string> args = {
+ "/proc/self/exe", "--child_setlock_on", path,
+ "--child_setlock_start", absl::StrCat(start), "--child_setlock_len",
+ absl::StrCat(length), "--socket_fd", absl::StrCat(fd)};
+
+ if (for_write) {
+ args.push_back("--child_setlock_write");
+ }
+
+ if (blocking) {
+ args.push_back("--blocking");
+ }
+
+ if (retry_eintr) {
+ args.push_back("--retry_eintr");
+ }
+
+ int execve_errno = 0;
+ ASSIGN_OR_RETURN_ERRNO(
+ auto cleanup,
+ ForkAndExec("/proc/self/exe", ExecveArray(args.begin(), args.end()), {},
+ nullptr, child, &execve_errno));
+
+ if (execve_errno != 0) {
+ return PosixError(execve_errno, "execve");
+ }
+
+ return std::move(cleanup);
+}
+
+PosixErrorOr<FileDescriptor> Eventfd(int count, int flags) {
+ int efd = eventfd(count, flags);
+ if (efd < 0) {
+ return PosixError(errno, "Eventfd");
+ }
+ return FileDescriptor(efd);
+}
+
+TEST(FcntlTest, SetCloExec) {
+ // Open an eventfd file descriptor with FD_CLOEXEC descriptor flag not set.
+ FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Eventfd(0, 0));
+ ASSERT_THAT(fcntl(fd.get(), F_GETFD), SyscallSucceedsWithValue(0));
+
+ // Set the FD_CLOEXEC flag.
+ ASSERT_THAT(fcntl(fd.get(), F_SETFD, FD_CLOEXEC), SyscallSucceeds());
+ ASSERT_THAT(fcntl(fd.get(), F_GETFD), SyscallSucceedsWithValue(FD_CLOEXEC));
+}
+
+TEST(FcntlTest, ClearCloExec) {
+ // Open an eventfd file descriptor with FD_CLOEXEC descriptor flag set.
+ FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Eventfd(0, EFD_CLOEXEC));
+ ASSERT_THAT(fcntl(fd.get(), F_GETFD), SyscallSucceedsWithValue(FD_CLOEXEC));
+
+ // Clear the FD_CLOEXEC flag.
+ ASSERT_THAT(fcntl(fd.get(), F_SETFD, 0), SyscallSucceeds());
+ ASSERT_THAT(fcntl(fd.get(), F_GETFD), SyscallSucceedsWithValue(0));
+}
+
+TEST(FcntlTest, IndependentDescriptorFlags) {
+ // Open an eventfd file descriptor with FD_CLOEXEC descriptor flag not set.
+ FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Eventfd(0, 0));
+ ASSERT_THAT(fcntl(fd.get(), F_GETFD), SyscallSucceedsWithValue(0));
+
+ // Duplicate the descriptor. Ensure that it also doesn't have FD_CLOEXEC.
+ FileDescriptor newfd = ASSERT_NO_ERRNO_AND_VALUE(fd.Dup());
+ ASSERT_THAT(fcntl(newfd.get(), F_GETFD), SyscallSucceedsWithValue(0));
+
+ // Set FD_CLOEXEC on the first FD.
+ ASSERT_THAT(fcntl(fd.get(), F_SETFD, FD_CLOEXEC), SyscallSucceeds());
+ ASSERT_THAT(fcntl(fd.get(), F_GETFD), SyscallSucceedsWithValue(FD_CLOEXEC));
+
+ // Ensure that the second FD is unaffected by the change on the first.
+ ASSERT_THAT(fcntl(newfd.get(), F_GETFD), SyscallSucceedsWithValue(0));
+}
+
+// All file description flags passed to open appear in F_GETFL.
+TEST(FcntlTest, GetAllFlags) {
+ TempPath path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ int flags = O_RDWR | O_DIRECT | O_SYNC | O_NONBLOCK | O_APPEND;
+ FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path.path(), flags));
+
+ // Linux forces O_LARGEFILE on all 64-bit kernels and gVisor's is 64-bit.
+ int expected = flags | kOLargeFile;
+
+ int rflags;
+ EXPECT_THAT(rflags = fcntl(fd.get(), F_GETFL), SyscallSucceeds());
+ EXPECT_EQ(rflags, expected);
+}
+
+TEST(FcntlTest, SetFlags) {
+ TempPath path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path.path(), 0));
+
+ int const flags = O_RDWR | O_DIRECT | O_SYNC | O_NONBLOCK | O_APPEND;
+ EXPECT_THAT(fcntl(fd.get(), F_SETFL, flags), SyscallSucceeds());
+
+ // Can't set O_RDWR or O_SYNC.
+ // Linux forces O_LARGEFILE on all 64-bit kernels and gVisor's is 64-bit.
+ int expected = O_DIRECT | O_NONBLOCK | O_APPEND | kOLargeFile;
+
+ int rflags;
+ EXPECT_THAT(rflags = fcntl(fd.get(), F_GETFL), SyscallSucceeds());
+ EXPECT_EQ(rflags, expected);
+}
+
+TEST_F(FcntlLockTest, SetLockBadFd) {
+ struct flock fl;
+ fl.l_type = F_WRLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = 0;
+ // len 0 has a special meaning: lock all bytes despite how
+ // large the file grows.
+ fl.l_len = 0;
+ EXPECT_THAT(fcntl(-1, F_SETLK, &fl), SyscallFailsWithErrno(EBADF));
+}
+
+TEST_F(FcntlLockTest, SetLockPipe) {
+ int fds[2];
+ ASSERT_THAT(pipe(fds), SyscallSucceeds());
+
+ struct flock fl;
+ fl.l_type = F_WRLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = 0;
+ // Same as SetLockBadFd, but doesn't matter, we expect this to fail.
+ fl.l_len = 0;
+ EXPECT_THAT(fcntl(fds[0], F_SETLK, &fl), SyscallFailsWithErrno(EBADF));
+ EXPECT_THAT(close(fds[0]), SyscallSucceeds());
+ EXPECT_THAT(close(fds[1]), SyscallSucceeds());
+}
+
+TEST_F(FcntlLockTest, SetLockDir) {
+ auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_RDONLY, 0666));
+
+ struct flock fl;
+ fl.l_type = F_RDLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = 0;
+ // Same as SetLockBadFd.
+ fl.l_len = 0;
+
+ EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds());
+}
+
+TEST_F(FcntlLockTest, SetLockBadOpenFlagsWrite) {
+ auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY, 0666));
+
+ struct flock fl0;
+ fl0.l_type = F_WRLCK;
+ fl0.l_whence = SEEK_SET;
+ fl0.l_start = 0;
+ // Same as SetLockBadFd.
+ fl0.l_len = 0;
+
+ // Expect that setting a write lock using a read only file descriptor
+ // won't work.
+ EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl0), SyscallFailsWithErrno(EBADF));
+}
+
+TEST_F(FcntlLockTest, SetLockBadOpenFlagsRead) {
+ auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_WRONLY, 0666));
+
+ struct flock fl1;
+ fl1.l_type = F_RDLCK;
+ fl1.l_whence = SEEK_SET;
+ fl1.l_start = 0;
+ // Same as SetLockBadFd.
+ fl1.l_len = 0;
+
+ EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl1), SyscallFailsWithErrno(EBADF));
+}
+
+TEST_F(FcntlLockTest, SetLockUnlockOnNothing) {
+ auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666));
+
+ struct flock fl;
+ fl.l_type = F_UNLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = 0;
+ // Same as SetLockBadFd.
+ fl.l_len = 0;
+
+ EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds());
+}
+
+TEST_F(FcntlLockTest, SetWriteLockSingleProc) {
+ auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ FileDescriptor fd0 =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666));
+
+ struct flock fl;
+ fl.l_type = F_WRLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = 0;
+ // Same as SetLockBadFd.
+ fl.l_len = 0;
+
+ EXPECT_THAT(fcntl(fd0.get(), F_SETLK, &fl), SyscallSucceeds());
+ // Expect to be able to take the same lock on the same fd no problem.
+ EXPECT_THAT(fcntl(fd0.get(), F_SETLK, &fl), SyscallSucceeds());
+
+ FileDescriptor fd1 =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666));
+
+ // Expect to be able to take the same lock from a different fd but for
+ // the same process.
+ EXPECT_THAT(fcntl(fd1.get(), F_SETLK, &fl), SyscallSucceeds());
+}
+
+TEST_F(FcntlLockTest, SetReadLockMultiProc) {
+ auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666));
+
+ struct flock fl;
+ fl.l_type = F_RDLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = 0;
+ // Same as SetLockBadFd.
+ fl.l_len = 0;
+ EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds());
+
+ // spawn a child process to take a read lock on the same file.
+ pid_t child_pid = 0;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ SubprocessLock(file.path(), false /* write lock */,
+ false /* nonblocking */, false /* no eintr retry */,
+ -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid));
+
+ int status = 0;
+ ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << "Exited with code: " << status;
+}
+
+TEST_F(FcntlLockTest, SetReadThenWriteLockMultiProc) {
+ auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666));
+
+ struct flock fl;
+ fl.l_type = F_RDLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = 0;
+ // Same as SetLockBadFd.
+ fl.l_len = 0;
+ EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds());
+
+ // Assert that another process trying to lock on the same file will fail
+ // with EAGAIN. It's important that we keep the fd above open so that
+ // that the other process will contend with the lock.
+ pid_t child_pid = 0;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ SubprocessLock(file.path(), true /* write lock */,
+ false /* nonblocking */, false /* no eintr retry */,
+ -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid));
+
+ int status = 0;
+ ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == EAGAIN)
+ << "Exited with code: " << status;
+
+ // Close the fd: we want to test that another process can acquire the
+ // lock after this point.
+ fd.reset();
+ // Assert that another process can now acquire the lock.
+
+ child_pid = 0;
+ auto cleanup2 = ASSERT_NO_ERRNO_AND_VALUE(
+ SubprocessLock(file.path(), true /* write lock */,
+ false /* nonblocking */, false /* no eintr retry */,
+ -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid));
+ ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << "Exited with code: " << status;
+}
+
+TEST_F(FcntlLockTest, SetWriteThenReadLockMultiProc) {
+ auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+ FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666));
+ // Same as SetReadThenWriteLockMultiProc.
+
+ struct flock fl;
+ fl.l_type = F_WRLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = 0;
+ // Same as SetLockBadFd.
+ fl.l_len = 0;
+
+ // Same as SetReadThenWriteLockMultiProc.
+ EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds());
+
+ // Same as SetReadThenWriteLockMultiProc.
+ pid_t child_pid = 0;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ SubprocessLock(file.path(), false /* write lock */,
+ false /* nonblocking */, false /* no eintr retry */,
+ -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid));
+
+ int status = 0;
+ ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == EAGAIN)
+ << "Exited with code: " << status;
+
+ // Same as SetReadThenWriteLockMultiProc.
+ fd.reset(); // Close the fd.
+
+ // Same as SetReadThenWriteLockMultiProc.
+ child_pid = 0;
+ auto cleanup2 = ASSERT_NO_ERRNO_AND_VALUE(
+ SubprocessLock(file.path(), false /* write lock */,
+ false /* nonblocking */, false /* no eintr retry */,
+ -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid));
+ ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << "Exited with code: " << status;
+}
+
+TEST_F(FcntlLockTest, SetWriteLockMultiProc) {
+ auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666));
+ // Same as SetReadThenWriteLockMultiProc.
+
+ struct flock fl;
+ fl.l_type = F_WRLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = 0;
+ // Same as SetLockBadFd.
+ fl.l_len = 0;
+
+ // Same as SetReadWriteLockMultiProc.
+ EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds());
+
+ // Same as SetReadWriteLockMultiProc.
+ pid_t child_pid = 0;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ SubprocessLock(file.path(), true /* write lock */,
+ false /* nonblocking */, false /* no eintr retry */,
+ -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid));
+ int status = 0;
+ ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == EAGAIN)
+ << "Exited with code: " << status;
+
+ fd.reset(); // Close the FD.
+ // Same as SetReadWriteLockMultiProc.
+ child_pid = 0;
+ auto cleanup2 = ASSERT_NO_ERRNO_AND_VALUE(
+ SubprocessLock(file.path(), true /* write lock */,
+ false /* nonblocking */, false /* no eintr retry */,
+ -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid));
+ ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << "Exited with code: " << status;
+}
+
+TEST_F(FcntlLockTest, SetLockIsRegional) {
+ auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666));
+
+ struct flock fl;
+ fl.l_type = F_WRLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = 0;
+ fl.l_len = 4096;
+
+ // Same as SetReadWriteLockMultiProc.
+ EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds());
+
+ // Same as SetReadWriteLockMultiProc.
+ pid_t child_pid = 0;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ SubprocessLock(file.path(), true /* write lock */,
+ false /* nonblocking */, false /* no eintr retry */,
+ -1 /* no socket fd */, fl.l_len, 0, &child_pid));
+ int status = 0;
+ ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << "Exited with code: " << status;
+}
+
+TEST_F(FcntlLockTest, SetLockUpgradeDowngrade) {
+ auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666));
+
+ struct flock fl;
+ fl.l_type = F_RDLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = 0;
+ // Same as SetLockBadFd.
+ fl.l_len = 0;
+
+ // Same as SetReadWriteLockMultiProc.
+ EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds());
+
+ // Upgrade to a write lock. This will prevent anyone else from taking
+ // the lock.
+ fl.l_type = F_WRLCK;
+ EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds());
+
+ // Same as SetReadWriteLockMultiProc.,
+ pid_t child_pid = 0;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ SubprocessLock(file.path(), false /* write lock */,
+ false /* nonblocking */, false /* no eintr retry */,
+ -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid));
+
+ int status = 0;
+ ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == EAGAIN)
+ << "Exited with code: " << status;
+
+ // Downgrade back to a read lock.
+ fl.l_type = F_RDLCK;
+ EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds());
+
+ // Do the same stint as before, but this time it should succeed.
+ child_pid = 0;
+ auto cleanup2 = ASSERT_NO_ERRNO_AND_VALUE(
+ SubprocessLock(file.path(), false /* write lock */,
+ false /* nonblocking */, false /* no eintr retry */,
+ -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid));
+
+ ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << "Exited with code: " << status;
+}
+
+TEST_F(FcntlLockTest, SetLockDroppedOnClose) {
+ auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666));
+
+ // While somewhat surprising, obtaining another fd to the same file and
+ // then closing it in this process drops *all* locks.
+ FileDescriptor other_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666));
+ // Same as SetReadThenWriteLockMultiProc.
+
+ struct flock fl;
+ fl.l_type = F_WRLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = 0;
+ // Same as SetLockBadFd.
+ fl.l_len = 0;
+
+ // Same as SetReadWriteLockMultiProc.
+ EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds());
+
+ other_fd.reset(); // Close.
+
+ // Expect to be able to get the lock, given that the close above dropped it.
+ pid_t child_pid = 0;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ SubprocessLock(file.path(), true /* write lock */,
+ false /* nonblocking */, false /* no eintr retry */,
+ -1 /* no socket fd */, fl.l_start, fl.l_len, &child_pid));
+
+ int status = 0;
+ ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << "Exited with code: " << status;
+}
+
+TEST_F(FcntlLockTest, SetLockUnlock) {
+ auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666));
+
+ // Setup two regional locks with different permissions.
+ struct flock fl0;
+ fl0.l_type = F_WRLCK;
+ fl0.l_whence = SEEK_SET;
+ fl0.l_start = 0;
+ fl0.l_len = 4096;
+
+ struct flock fl1;
+ fl1.l_type = F_RDLCK;
+ fl1.l_whence = SEEK_SET;
+ fl1.l_start = 4096;
+ // Same as SetLockBadFd.
+ fl1.l_len = 0;
+
+ // Set both region locks.
+ EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl0), SyscallSucceeds());
+ EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl1), SyscallSucceeds());
+
+ // Another process should fail to take a read lock on the entire file
+ // due to the regional write lock.
+ pid_t child_pid = 0;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(SubprocessLock(
+ file.path(), false /* write lock */, false /* nonblocking */,
+ false /* no eintr retry */, -1 /* no socket fd */, 0, 0, &child_pid));
+
+ int status = 0;
+ ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == EAGAIN)
+ << "Exited with code: " << status;
+
+ // Then only unlock the writable one. This should ensure that other
+ // processes can take any read lock that it wants.
+ fl0.l_type = F_UNLCK;
+ EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl0), SyscallSucceeds());
+
+ // Another process should now succeed to get a read lock on the entire file.
+ child_pid = 0;
+ auto cleanup2 = ASSERT_NO_ERRNO_AND_VALUE(SubprocessLock(
+ file.path(), false /* write lock */, false /* nonblocking */,
+ false /* no eintr retry */, -1 /* no socket fd */, 0, 0, &child_pid));
+ ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << "Exited with code: " << status;
+}
+
+TEST_F(FcntlLockTest, SetLockAcrossRename) {
+ auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666));
+
+ // Setup two regional locks with different permissions.
+ struct flock fl;
+ fl.l_type = F_WRLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = 0;
+ // Same as SetLockBadFd.
+ fl.l_len = 0;
+
+ // Set the region lock.
+ EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds());
+
+ // Rename the file to someplace nearby.
+ std::string const newpath = NewTempAbsPath();
+ EXPECT_THAT(rename(file.path().c_str(), newpath.c_str()), SyscallSucceeds());
+
+ // Another process should fail to take a read lock on the renamed file
+ // since we still have an open handle to the inode.
+ pid_t child_pid = 0;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ SubprocessLock(newpath, false /* write lock */, false /* nonblocking */,
+ false /* no eintr retry */, -1 /* no socket fd */,
+ fl.l_start, fl.l_len, &child_pid));
+
+ int status = 0;
+ ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == EAGAIN)
+ << "Exited with code: " << status;
+}
+
+// NOTE: The blocking tests below aren't perfect. It's hard to assert exactly
+// what the kernel did while handling a syscall. These tests are timing based
+// because there really isn't any other reasonable way to assert that correct
+// blocking behavior happened.
+
+// This test will verify that blocking works as expected when another process
+// holds a write lock when obtaining a write lock. This test will hold the lock
+// for some amount of time and then wait for the second process to send over the
+// socket_fd the amount of time it was blocked for before the lock succeeded.
+TEST_F(FcntlLockTest, SetWriteLockThenBlockingWriteLock) {
+ auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666));
+
+ struct flock fl;
+ fl.l_type = F_WRLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = 0;
+ fl.l_len = 0;
+
+ // Take the write lock.
+ ASSERT_THAT(fcntl(fd.get(), F_SETLKW, &fl), SyscallSucceeds());
+
+ // Attempt to take the read lock in a sub process. This will immediately block
+ // so we will release our lock after some amount of time and then assert the
+ // amount of time the other process was blocked for.
+ pid_t child_pid = 0;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(SubprocessLock(
+ file.path(), true /* write lock */, true /* Blocking Lock */,
+ true /* Retry on EINTR */, fds_[1] /* Socket fd for timing information */,
+ fl.l_start, fl.l_len, &child_pid));
+
+ // We will wait kHoldLockForSec before we release our lock allowing the
+ // subprocess to obtain it.
+ constexpr absl::Duration kHoldLockFor = absl::Seconds(5);
+ const int64_t kMinBlockTimeUsec = absl::ToInt64Microseconds(absl::Seconds(1));
+
+ absl::SleepFor(kHoldLockFor);
+
+ // Unlock our write lock.
+ fl.l_type = F_UNLCK;
+ ASSERT_THAT(fcntl(fd.get(), F_SETLKW, &fl), SyscallSucceeds());
+
+ // Read the blocked time from the subprocess socket.
+ int64_t subprocess_blocked_time_usec = GetSubprocessFcntlTimeInUsec();
+
+ // We must have been waiting at least kMinBlockTime.
+ EXPECT_GT(subprocess_blocked_time_usec, kMinBlockTimeUsec);
+
+ // The FCNTL write lock must always succeed as it will simply block until it
+ // can obtain the lock.
+ int status = 0;
+ ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << "Exited with code: " << status;
+}
+
+// This test will veirfy that blocking works as expected when another process
+// holds a read lock when obtaining a write lock. This test will hold the lock
+// for some amount of time and then wait for the second process to send over the
+// socket_fd the amount of time it was blocked for before the lock succeeded.
+TEST_F(FcntlLockTest, SetReadLockThenBlockingWriteLock) {
+ auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666));
+
+ struct flock fl;
+ fl.l_type = F_RDLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = 0;
+ fl.l_len = 0;
+
+ // Take the write lock.
+ ASSERT_THAT(fcntl(fd.get(), F_SETLKW, &fl), SyscallSucceeds());
+
+ // Attempt to take the read lock in a sub process. This will immediately block
+ // so we will release our lock after some amount of time and then assert the
+ // amount of time the other process was blocked for.
+ pid_t child_pid = 0;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(SubprocessLock(
+ file.path(), true /* write lock */, true /* Blocking Lock */,
+ true /* Retry on EINTR */, fds_[1] /* Socket fd for timing information */,
+ fl.l_start, fl.l_len, &child_pid));
+
+ // We will wait kHoldLockForSec before we release our lock allowing the
+ // subprocess to obtain it.
+ constexpr absl::Duration kHoldLockFor = absl::Seconds(5);
+
+ const int64_t kMinBlockTimeUsec = absl::ToInt64Microseconds(absl::Seconds(1));
+
+ absl::SleepFor(kHoldLockFor);
+
+ // Unlock our READ lock.
+ fl.l_type = F_UNLCK;
+ ASSERT_THAT(fcntl(fd.get(), F_SETLKW, &fl), SyscallSucceeds());
+
+ // Read the blocked time from the subprocess socket.
+ int64_t subprocess_blocked_time_usec = GetSubprocessFcntlTimeInUsec();
+
+ // We must have been waiting at least kMinBlockTime.
+ EXPECT_GT(subprocess_blocked_time_usec, kMinBlockTimeUsec);
+
+ // The FCNTL write lock must always succeed as it will simply block until it
+ // can obtain the lock.
+ int status = 0;
+ ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << "Exited with code: " << status;
+}
+
+// This test will veirfy that blocking works as expected when another process
+// holds a write lock when obtaining a read lock. This test will hold the lock
+// for some amount of time and then wait for the second process to send over the
+// socket_fd the amount of time it was blocked for before the lock succeeded.
+TEST_F(FcntlLockTest, SetWriteLockThenBlockingReadLock) {
+ auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666));
+
+ struct flock fl;
+ fl.l_type = F_WRLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = 0;
+ fl.l_len = 0;
+
+ // Take the write lock.
+ ASSERT_THAT(fcntl(fd.get(), F_SETLKW, &fl), SyscallSucceeds());
+
+ // Attempt to take the read lock in a sub process. This will immediately block
+ // so we will release our lock after some amount of time and then assert the
+ // amount of time the other process was blocked for.
+ pid_t child_pid = 0;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(SubprocessLock(
+ file.path(), false /* read lock */, true /* Blocking Lock */,
+ true /* Retry on EINTR */, fds_[1] /* Socket fd for timing information */,
+ fl.l_start, fl.l_len, &child_pid));
+
+ // We will wait kHoldLockForSec before we release our lock allowing the
+ // subprocess to obtain it.
+ constexpr absl::Duration kHoldLockFor = absl::Seconds(5);
+
+ const int64_t kMinBlockTimeUsec = absl::ToInt64Microseconds(absl::Seconds(1));
+
+ absl::SleepFor(kHoldLockFor);
+
+ // Unlock our write lock.
+ fl.l_type = F_UNLCK;
+ ASSERT_THAT(fcntl(fd.get(), F_SETLKW, &fl), SyscallSucceeds());
+
+ // Read the blocked time from the subprocess socket.
+ int64_t subprocess_blocked_time_usec = GetSubprocessFcntlTimeInUsec();
+
+ // We must have been waiting at least kMinBlockTime.
+ EXPECT_GT(subprocess_blocked_time_usec, kMinBlockTimeUsec);
+
+ // The FCNTL read lock must always succeed as it will simply block until it
+ // can obtain the lock.
+ int status = 0;
+ ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << "Exited with code: " << status;
+}
+
+// This test will verify that when one process only holds a read lock that
+// another will not block while obtaining a read lock when F_SETLKW is used.
+TEST_F(FcntlLockTest, SetReadLockThenBlockingReadLock) {
+ auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0666));
+
+ struct flock fl;
+ fl.l_type = F_RDLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = 0;
+ fl.l_len = 0;
+
+ // Take the READ lock.
+ ASSERT_THAT(fcntl(fd.get(), F_SETLKW, &fl), SyscallSucceeds());
+
+ // Attempt to take the read lock in a sub process. Since multiple processes
+ // can hold a read lock this should immediately return without blocking
+ // even though we used F_SETLKW in the subprocess.
+ pid_t child_pid = 0;
+ auto sp = ASSERT_NO_ERRNO_AND_VALUE(SubprocessLock(
+ file.path(), false /* read lock */, true /* Blocking Lock */,
+ true /* Retry on EINTR */, -1 /* No fd, should not block */, fl.l_start,
+ fl.l_len, &child_pid));
+
+ // We never release the lock and the subprocess should still obtain it without
+ // blocking for any period of time.
+ int status = 0;
+ ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << "Exited with code: " << status;
+}
+
+TEST(FcntlTest, GetO_ASYNC) {
+ FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+ Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+ int flag_fl = -1;
+ ASSERT_THAT(flag_fl = fcntl(s.get(), F_GETFL), SyscallSucceeds());
+ EXPECT_EQ(flag_fl & O_ASYNC, 0);
+
+ int flag_fd = -1;
+ ASSERT_THAT(flag_fd = fcntl(s.get(), F_GETFD), SyscallSucceeds());
+ EXPECT_EQ(flag_fd & O_ASYNC, 0);
+}
+
+TEST(FcntlTest, SetFlO_ASYNC) {
+ FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+ Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+ int before_fl = -1;
+ ASSERT_THAT(before_fl = fcntl(s.get(), F_GETFL), SyscallSucceeds());
+
+ int before_fd = -1;
+ ASSERT_THAT(before_fd = fcntl(s.get(), F_GETFD), SyscallSucceeds());
+
+ ASSERT_THAT(fcntl(s.get(), F_SETFL, before_fl | O_ASYNC), SyscallSucceeds());
+
+ int after_fl = -1;
+ ASSERT_THAT(after_fl = fcntl(s.get(), F_GETFL), SyscallSucceeds());
+ EXPECT_EQ(after_fl, before_fl | O_ASYNC);
+
+ int after_fd = -1;
+ ASSERT_THAT(after_fd = fcntl(s.get(), F_GETFD), SyscallSucceeds());
+ EXPECT_EQ(after_fd, before_fd);
+}
+
+TEST(FcntlTest, SetFdO_ASYNC) {
+ FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+ Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+ int before_fl = -1;
+ ASSERT_THAT(before_fl = fcntl(s.get(), F_GETFL), SyscallSucceeds());
+
+ int before_fd = -1;
+ ASSERT_THAT(before_fd = fcntl(s.get(), F_GETFD), SyscallSucceeds());
+
+ ASSERT_THAT(fcntl(s.get(), F_SETFD, before_fd | O_ASYNC), SyscallSucceeds());
+
+ int after_fl = -1;
+ ASSERT_THAT(after_fl = fcntl(s.get(), F_GETFL), SyscallSucceeds());
+ EXPECT_EQ(after_fl, before_fl);
+
+ int after_fd = -1;
+ ASSERT_THAT(after_fd = fcntl(s.get(), F_GETFD), SyscallSucceeds());
+ EXPECT_EQ(after_fd, before_fd);
+}
+
+TEST(FcntlTest, DupAfterO_ASYNC) {
+ FileDescriptor s1 = ASSERT_NO_ERRNO_AND_VALUE(
+ Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+ int before = -1;
+ ASSERT_THAT(before = fcntl(s1.get(), F_GETFL), SyscallSucceeds());
+
+ ASSERT_THAT(fcntl(s1.get(), F_SETFL, before | O_ASYNC), SyscallSucceeds());
+
+ FileDescriptor fd2 = ASSERT_NO_ERRNO_AND_VALUE(s1.Dup());
+
+ int after = -1;
+ ASSERT_THAT(after = fcntl(fd2.get(), F_GETFL), SyscallSucceeds());
+ EXPECT_EQ(after & O_ASYNC, O_ASYNC);
+}
+
+TEST(FcntlTest, GetOwn) {
+ FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+ Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+ ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN),
+ SyscallSucceedsWithValue(0));
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
+
+int main(int argc, char** argv) {
+ gvisor::testing::TestInit(&argc, &argv);
+
+ if (!FLAGS_child_setlock_on.empty()) {
+ int socket_fd = FLAGS_socket_fd;
+ int fd = open(FLAGS_child_setlock_on.c_str(), O_RDWR, 0666);
+ if (fd == -1 && errno != 0) {
+ int err = errno;
+ std::cerr << "CHILD open " << FLAGS_child_setlock_on << " failed " << err
+ << std::endl;
+ exit(err);
+ }
+
+ struct flock fl;
+ if (FLAGS_child_setlock_write) {
+ fl.l_type = F_WRLCK;
+ } else {
+ fl.l_type = F_RDLCK;
+ }
+ fl.l_whence = SEEK_SET;
+ fl.l_start = FLAGS_child_setlock_start;
+ fl.l_len = FLAGS_child_setlock_len;
+
+ // Test the fcntl, no need to log, the error is unambiguously
+ // from fcntl at this point.
+ int err = 0;
+ int ret = 0;
+
+ gvisor::testing::MonotonicTimer timer;
+ timer.Start();
+ do {
+ ret = fcntl(fd, FLAGS_blocking ? F_SETLKW : F_SETLK, &fl);
+ } while (FLAGS_retry_eintr && ret == -1 && errno == EINTR);
+ auto usec = absl::ToInt64Microseconds(timer.Duration());
+
+ if (ret == -1 && errno != 0) {
+ err = errno;
+ }
+
+ // If there is a socket fd let's send back the time in microseconds it took
+ // to execute this syscall.
+ if (socket_fd != -1) {
+ gvisor::testing::WriteFd(socket_fd, reinterpret_cast<void*>(&usec),
+ sizeof(usec));
+ close(socket_fd);
+ }
+
+ close(fd);
+ exit(err);
+ }
+
+ return RUN_ALL_TESTS();
+}
diff --git a/test/syscalls/linux/file_base.h b/test/syscalls/linux/file_base.h
new file mode 100644
index 000000000..19c9a5053
--- /dev/null
+++ b/test/syscalls/linux/file_base.h
@@ -0,0 +1,206 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_FILE_BASE_H_
+#define GVISOR_TEST_SYSCALLS_FILE_BASE_H_
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <netinet/in.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <unistd.h>
+#include <cstring>
+#include <string>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "absl/strings/string_view.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/posix_error.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+class FileTest : public ::testing::Test {
+ public:
+ void SetUp() override {
+ test_pipe_[0] = -1;
+ test_pipe_[1] = -1;
+
+ test_file_name_ = NewTempAbsPath();
+ test_file_fd_ = ASSERT_NO_ERRNO_AND_VALUE(
+ Open(test_file_name_, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR));
+
+ // FIXME: enable when mknod syscall is supported.
+ // test_fifo_name_ = NewTempAbsPath();
+ // ASSERT_THAT(mknod(test_fifo_name_.c_str()), S_IFIFO|0644, 0,
+ // SyscallSucceeds());
+ // ASSERT_THAT(test_fifo_[1] = open(test_fifo_name_.c_str(),
+ // O_WRONLY),
+ // SyscallSucceeds());
+ // ASSERT_THAT(test_fifo_[0] = open(test_fifo_name_.c_str(),
+ // O_RDONLY),
+ // SyscallSucceeds());
+
+ ASSERT_THAT(pipe(test_pipe_), SyscallSucceeds());
+ ASSERT_THAT(fcntl(test_pipe_[0], F_SETFL, O_NONBLOCK), SyscallSucceeds());
+ }
+
+ // CloseFile will allow the test to manually close the file descriptor.
+ void CloseFile() { test_file_fd_.reset(); }
+
+ // UnlinkFile will allow the test to manually unlink the file.
+ void UnlinkFile() {
+ if (!test_file_name_.empty()) {
+ EXPECT_THAT(unlink(test_file_name_.c_str()), SyscallSucceeds());
+ test_file_name_.clear();
+ }
+ }
+
+ // ClosePipes will allow the test to manually close the pipes.
+ void ClosePipes() {
+ if (test_pipe_[0] > 0) {
+ EXPECT_THAT(close(test_pipe_[0]), SyscallSucceeds());
+ }
+
+ if (test_pipe_[1] > 0) {
+ EXPECT_THAT(close(test_pipe_[1]), SyscallSucceeds());
+ }
+
+ test_pipe_[0] = -1;
+ test_pipe_[1] = -1;
+ }
+
+ void TearDown() override {
+ CloseFile();
+ UnlinkFile();
+ ClosePipes();
+
+ // FIXME: enable when mknod syscall is supported.
+ // close(test_fifo_[0]);
+ // close(test_fifo_[1]);
+ // unlink(test_fifo_name_.c_str());
+ }
+
+ std::string test_file_name_;
+ std::string test_fifo_name_;
+ FileDescriptor test_file_fd_;
+
+ int test_fifo_[2];
+ int test_pipe_[2];
+};
+
+class SocketTest : public ::testing::Test {
+ public:
+ void SetUp() override {
+ test_unix_stream_socket_[0] = -1;
+ test_unix_stream_socket_[1] = -1;
+ test_unix_dgram_socket_[0] = -1;
+ test_unix_dgram_socket_[1] = -1;
+ test_unix_seqpacket_socket_[0] = -1;
+ test_unix_seqpacket_socket_[1] = -1;
+ test_tcp_socket_[0] = -1;
+ test_tcp_socket_[1] = -1;
+
+ ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, test_unix_stream_socket_),
+ SyscallSucceeds());
+ ASSERT_THAT(fcntl(test_unix_stream_socket_[0], F_SETFL, O_NONBLOCK),
+ SyscallSucceeds());
+ ASSERT_THAT(socketpair(AF_UNIX, SOCK_DGRAM, 0, test_unix_dgram_socket_),
+ SyscallSucceeds());
+ ASSERT_THAT(fcntl(test_unix_dgram_socket_[0], F_SETFL, O_NONBLOCK),
+ SyscallSucceeds());
+ ASSERT_THAT(
+ socketpair(AF_UNIX, SOCK_SEQPACKET, 0, test_unix_seqpacket_socket_),
+ SyscallSucceeds());
+ ASSERT_THAT(fcntl(test_unix_seqpacket_socket_[0], F_SETFL, O_NONBLOCK),
+ SyscallSucceeds());
+ }
+
+ void TearDown() override {
+ close(test_unix_stream_socket_[0]);
+ close(test_unix_stream_socket_[1]);
+
+ close(test_unix_dgram_socket_[0]);
+ close(test_unix_dgram_socket_[1]);
+
+ close(test_unix_seqpacket_socket_[0]);
+ close(test_unix_seqpacket_socket_[1]);
+
+ close(test_tcp_socket_[0]);
+ close(test_tcp_socket_[1]);
+ }
+
+ int test_unix_stream_socket_[2];
+ int test_unix_dgram_socket_[2];
+ int test_unix_seqpacket_socket_[2];
+ int test_tcp_socket_[2];
+};
+
+// MatchesStringLength checks that a tuple argument of (struct iovec *, int)
+// corresponding to an iovec array and its length, contains data that matches
+// the std::string length strlen.
+MATCHER_P(MatchesStringLength, strlen, "") {
+ struct iovec* iovs = arg.first;
+ int niov = arg.second;
+ int offset = 0;
+ for (int i = 0; i < niov; i++) {
+ offset += iovs[i].iov_len;
+ }
+ if (offset != static_cast<int>(strlen)) {
+ *result_listener << offset;
+ return false;
+ }
+ return true;
+}
+
+// MatchesStringValue checks that a tuple argument of (struct iovec *, int)
+// corresponding to an iovec array and its length, contains data that matches
+// the std::string value str.
+MATCHER_P(MatchesStringValue, str, "") {
+ struct iovec* iovs = arg.first;
+ int len = strlen(str);
+ int niov = arg.second;
+ int offset = 0;
+ for (int i = 0; i < niov; i++) {
+ struct iovec iov = iovs[i];
+ if (len < offset) {
+ *result_listener << "strlen " << len << " < offset " << offset;
+ return false;
+ }
+ if (strncmp(static_cast<char*>(iov.iov_base), &str[offset], iov.iov_len)) {
+ absl::string_view iovec_string(static_cast<char*>(iov.iov_base),
+ iov.iov_len);
+ *result_listener << iovec_string << " @offset " << offset;
+ return false;
+ }
+ offset += iov.iov_len;
+ }
+ return true;
+}
+
+} // namespace testing
+} // namespace gvisor
+
+#endif // GVISOR_TEST_SYSCALLS_FILE_BASE_H_
diff --git a/test/syscalls/linux/flock.cc b/test/syscalls/linux/flock.cc
new file mode 100644
index 000000000..fb93c8034
--- /dev/null
+++ b/test/syscalls/linux/flock.cc
@@ -0,0 +1,588 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <sys/file.h>
+#include <string>
+
+#include "gtest/gtest.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/syscalls/linux/file_base.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+#include "test/util/timer_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+class FlockTest : public FileTest {};
+
+TEST_F(FlockTest, BadFD) {
+ // EBADF: fd is not an open file descriptor.
+ ASSERT_THAT(flock(-1, 0), SyscallFailsWithErrno(EBADF));
+}
+
+TEST_F(FlockTest, InvalidOpCombinations) {
+ // The operation cannot be both exclusive and shared.
+ EXPECT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_SH | LOCK_NB),
+ SyscallFailsWithErrno(EINVAL));
+
+ // Locking and Unlocking doesn't make sense.
+ EXPECT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_UN | LOCK_NB),
+ SyscallFailsWithErrno(EINVAL));
+ EXPECT_THAT(flock(test_file_fd_.get(), LOCK_SH | LOCK_UN | LOCK_NB),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_F(FlockTest, NoOperationSpecified) {
+ // Not specifying an operation is invalid.
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_NB),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(FlockTestNoFixture, FlockSupportsPipes) {
+ int fds[2];
+ ASSERT_THAT(pipe(fds), SyscallSucceeds());
+
+ EXPECT_THAT(flock(fds[0], LOCK_EX | LOCK_NB), SyscallSucceeds());
+ EXPECT_THAT(close(fds[0]), SyscallSucceeds());
+ EXPECT_THAT(close(fds[1]), SyscallSucceeds());
+}
+
+TEST_F(FlockTest, TestSimpleExLock) {
+ // Test that we can obtain an exclusive lock (no other holders)
+ // and that we can unlock it.
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_NB),
+ SyscallSucceedsWithValue(0));
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(FlockTest, TestSimpleShLock) {
+ // Test that we can obtain a shared lock (no other holders)
+ // and that we can unlock it.
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_SH | LOCK_NB),
+ SyscallSucceedsWithValue(0));
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(FlockTest, TestLockableAnyMode) {
+ // flock(2): A shared or exclusive lock can be placed on a file
+ // regardless of the mode in which the file was opened.
+ const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(
+ Open(test_file_name_, O_RDONLY)); // open read only to test
+
+ // Mode shouldn't prevent us from taking an exclusive lock.
+ ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), SyscallSucceedsWithValue(0));
+
+ // Unlock
+ ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(FlockTest, TestUnlockWithNoHolders) {
+ // Test that unlocking when no one holds a lock succeeeds.
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(FlockTest, TestRepeatedExLockingBySameHolder) {
+ // Test that repeated locking by the same holder for the
+ // same type of lock works correctly.
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_NB | LOCK_EX),
+ SyscallSucceedsWithValue(0));
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_NB | LOCK_EX),
+ SyscallSucceedsWithValue(0));
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(FlockTest, TestRepeatedExLockingSingleUnlock) {
+ // Test that repeated locking by the same holder for the
+ // same type of lock works correctly and that a single unlock is required.
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_NB | LOCK_EX),
+ SyscallSucceedsWithValue(0));
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_NB | LOCK_EX),
+ SyscallSucceedsWithValue(0));
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDONLY));
+
+ // Should be unlocked at this point
+ ASSERT_THAT(flock(fd.get(), LOCK_NB | LOCK_EX), SyscallSucceedsWithValue(0));
+
+ ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(FlockTest, TestRepeatedShLockingBySameHolder) {
+ // Test that repeated locking by the same holder for the
+ // same type of lock works correctly.
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_NB | LOCK_SH),
+ SyscallSucceedsWithValue(0));
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_NB | LOCK_SH),
+ SyscallSucceedsWithValue(0));
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(FlockTest, TestSingleHolderUpgrade) {
+ // Test that a shared lock is upgradable when no one else holds a lock.
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_NB | LOCK_SH),
+ SyscallSucceedsWithValue(0));
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_NB | LOCK_EX),
+ SyscallSucceedsWithValue(0));
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(FlockTest, TestSingleHolderDowngrade) {
+ // Test single holder lock downgrade case.
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_NB),
+ SyscallSucceedsWithValue(0));
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_SH | LOCK_NB),
+ SyscallSucceedsWithValue(0));
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(FlockTest, TestMultipleShared) {
+ // This is a simple test to verify that multiple independent shared
+ // locks will be granted.
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_SH | LOCK_NB),
+ SyscallSucceedsWithValue(0));
+
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR));
+
+ // A shared lock should be granted as there only exists other shared locks.
+ ASSERT_THAT(flock(fd.get(), LOCK_SH | LOCK_NB), SyscallSucceedsWithValue(0));
+
+ // Unlock both.
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+ ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+/*
+ * flock(2): If a process uses open(2) (or similar) to obtain more than one
+ * descriptor for the same file, these descriptors are treated
+ * independently by flock(). An attempt to lock the file using one of
+ * these file descriptors may be denied by a lock that the calling process
+ * has already placed via another descriptor.
+ */
+TEST_F(FlockTest, TestMultipleHolderSharedExclusive) {
+ // This test will verify that an exclusive lock will not be granted
+ // while a shared is held.
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_SH | LOCK_NB),
+ SyscallSucceedsWithValue(0));
+
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR));
+
+ // Verify We're unable to get an exlcusive lock via the second FD.
+ // because someone is holding a shared lock.
+ ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB),
+ SyscallFailsWithErrno(EWOULDBLOCK));
+
+ // Unlock
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(FlockTest, TestSharedLockFailExclusiveHolder) {
+ // This test will verify that a shared lock is denied while
+ // someone holds an exclusive lock.
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_NB),
+ SyscallSucceedsWithValue(0));
+
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR));
+
+ // Verify we're unable to get an shared lock via the second FD.
+ // because someone is holding an exclusive lock.
+ ASSERT_THAT(flock(fd.get(), LOCK_SH | LOCK_NB),
+ SyscallFailsWithErrno(EWOULDBLOCK));
+
+ // Unlock
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(FlockTest, TestExclusiveLockFailExclusiveHolder) {
+ // This test will verify that an exclusive lock is denied while
+ // someone already holds an exclsuive lock.
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_NB),
+ SyscallSucceedsWithValue(0));
+
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR));
+
+ // Verify we're unable to get an exclusive lock via the second FD
+ // because someone is already holding an exclusive lock.
+ ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB),
+ SyscallFailsWithErrno(EWOULDBLOCK));
+
+ // Unlock
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(FlockTest, TestMultipleHolderSharedExclusiveUpgrade) {
+ // This test will verify that we cannot obtain an exclusive lock while
+ // a shared lock is held by another descriptor, then verify that an upgrade
+ // is possible on a shared lock once all other shared locks have closed.
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_SH | LOCK_NB),
+ SyscallSucceedsWithValue(0));
+
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR));
+
+ // Verify we're unable to get an exclusive lock via the second FD because
+ // a shared lock is held.
+ ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB),
+ SyscallFailsWithErrno(EWOULDBLOCK));
+
+ // Verify that we can get a shared lock via the second descriptor instead
+ ASSERT_THAT(flock(fd.get(), LOCK_SH | LOCK_NB), SyscallSucceedsWithValue(0));
+
+ // Unlock the first and there will only be one shared lock remaining.
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+
+ // Upgrade 2nd fd.
+ ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), SyscallSucceedsWithValue(0));
+
+ // Finally unlock the second
+ ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(FlockTest, TestMultipleHolderSharedExclusiveDowngrade) {
+ // This test will verify that a shared lock is not obtainable while an
+ // exclusive lock is held but that once the first is downgraded that
+ // the second independent file descriptor can also get a shared lock.
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_NB),
+ SyscallSucceedsWithValue(0));
+
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR));
+
+ // Verify We're unable to get a shared lock via the second FD because
+ // an exclusive lock is held.
+ ASSERT_THAT(flock(fd.get(), LOCK_SH | LOCK_NB),
+ SyscallFailsWithErrno(EWOULDBLOCK));
+
+ // Verify that we can downgrade the first.
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_SH | LOCK_NB),
+ SyscallSucceedsWithValue(0));
+
+ // Now verify that we can obtain a shared lock since the first was downgraded.
+ ASSERT_THAT(flock(fd.get(), LOCK_SH | LOCK_NB), SyscallSucceedsWithValue(0));
+
+ // Finally unlock both.
+ ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+/*
+ * flock(2): Locks created by flock() are associated with an open file table
+ * entry. This means that duplicate file descriptors (created by, for example,
+ * fork(2) or dup(2)) refer to the same lock, and this lock may be modified or
+ * released using any of these descriptors. Furthermore, the lock is released
+ * either by an explicit LOCK_UN operation on any of these duplicate descriptors
+ * or when all such descriptors have been closed.
+ */
+TEST_F(FlockTest, TestDupFdUpgrade) {
+ // This test will verify that a shared lock is upgradeable via a dupped
+ // file descriptor, if the FD wasn't dupped this would fail.
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_SH | LOCK_NB),
+ SyscallSucceedsWithValue(0));
+
+ const FileDescriptor dup_fd = ASSERT_NO_ERRNO_AND_VALUE(test_file_fd_.Dup());
+
+ // Now we should be able to upgrade via the dupped fd.
+ ASSERT_THAT(flock(dup_fd.get(), LOCK_EX | LOCK_NB),
+ SyscallSucceedsWithValue(0));
+
+ // Validate unlock via dupped fd.
+ ASSERT_THAT(flock(dup_fd.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(FlockTest, TestDupFdDowngrade) {
+ // This test will verify that a exclusive lock is downgradable via a dupped
+ // file descriptor, if the FD wasn't dupped this would fail.
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_NB),
+ SyscallSucceedsWithValue(0));
+
+ const FileDescriptor dup_fd = ASSERT_NO_ERRNO_AND_VALUE(test_file_fd_.Dup());
+
+ // Now we should be able to downgrade via the dupped fd.
+ ASSERT_THAT(flock(dup_fd.get(), LOCK_SH | LOCK_NB),
+ SyscallSucceedsWithValue(0));
+
+ // Validate unlock via dupped fd
+ ASSERT_THAT(flock(dup_fd.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(FlockTest, TestDupFdCloseRelease) {
+ // flock(2): Furthermore, the lock is released either by an explicit LOCK_UN
+ // operation on any of these duplicate descriptors, or when all such
+ // descriptors have been closed.
+ //
+ // This test will verify that a dupped fd closing will not release the
+ // underlying lock until all such dupped fds have closed.
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_NB),
+ SyscallSucceedsWithValue(0));
+
+ FileDescriptor dup_fd = ASSERT_NO_ERRNO_AND_VALUE(test_file_fd_.Dup());
+
+ // At this point we have ONE exclusive locked referenced by two different fds.
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR));
+
+ // Validate that we cannot get a lock on a new unrelated FD.
+ ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB),
+ SyscallFailsWithErrno(EWOULDBLOCK));
+
+ // Closing the dupped fd shouldn't affect the lock until all are closed.
+ dup_fd.reset(); // Closed the duped fd.
+
+ // Validate that we still cannot get a lock on a new unrelated FD.
+ ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB),
+ SyscallFailsWithErrno(EWOULDBLOCK));
+
+ // Closing the first fd
+ CloseFile(); // Will validate the syscall succeeds.
+
+ // Now we should actually be able to get a lock since all fds related to
+ // the first lock are closed.
+ ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), SyscallSucceedsWithValue(0));
+
+ // Unlock.
+ ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(FlockTest, TestDupFdUnlockRelease) {
+ /* flock(2): Furthermore, the lock is released either by an explicit LOCK_UN
+ * operation on any of these duplicate descriptors, or when all such
+ * descriptors have been closed.
+ */
+ // This test will verify that an explict unlock on a dupped FD will release
+ // the underlying lock unlike the previous case where close on a dup was
+ // not enough to release the lock.
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_NB),
+ SyscallSucceedsWithValue(0));
+
+ const FileDescriptor dup_fd = ASSERT_NO_ERRNO_AND_VALUE(test_file_fd_.Dup());
+
+ // At this point we have ONE exclusive locked referenced by two different fds.
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR));
+
+ // Validate that we cannot get a lock on a new unrelated FD.
+ ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB),
+ SyscallFailsWithErrno(EWOULDBLOCK));
+
+ // Explicitly unlock via the dupped descriptor.
+ ASSERT_THAT(flock(dup_fd.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+
+ // Validate that we can now get the lock since we explicitly unlocked.
+ ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), SyscallSucceedsWithValue(0));
+
+ // Unlock
+ ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(FlockTest, TestDupFdFollowedByLock) {
+ // This test will verify that taking a lock on a file descriptor that has
+ // already been dupped means that the lock is shared between both. This is
+ // slightly different than than duping on an already locked FD.
+ FileDescriptor dup_fd = ASSERT_NO_ERRNO_AND_VALUE(test_file_fd_.Dup());
+
+ // Take a lock.
+ ASSERT_THAT(flock(dup_fd.get(), LOCK_EX | LOCK_NB), SyscallSucceeds());
+
+ // Now dup_fd and test_file_ should both reference the same lock.
+ // We shouldn't be able to obtain a lock until both are closed.
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR));
+
+ // Closing the first fd
+ dup_fd.reset(); // Close the duped fd.
+
+ // Validate that we cannot get a lock yet because the dupped descriptor.
+ ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB),
+ SyscallFailsWithErrno(EWOULDBLOCK));
+
+ // Closing the second fd.
+ CloseFile(); // CloseFile() will validate the syscall succeeds.
+
+ // Now we should be able to get the lock.
+ ASSERT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), SyscallSucceeds());
+
+ // Unlock.
+ ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceedsWithValue(0));
+}
+
+// NOTE: These blocking tests are not perfect. Unfortunantely it's very hard to
+// determine if a thread was actually blocked in the kernel so we're forced
+// to use timing.
+TEST_F(FlockTest, BlockingLockNoBlockingForSharedLocks) {
+ // This test will verify that although LOCK_NB isn't specified
+ // two different fds can obtain shared locks without blocking.
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_SH), SyscallSucceeds());
+
+ // kHoldLockTime is the amount of time we will hold the lock before releasing.
+ constexpr absl::Duration kHoldLockTime = absl::Seconds(30);
+
+ const DisableSave ds; // Timing-related.
+
+ // We do this in another thread so we can determine if it was actually
+ // blocked by timing the amount of time it took for the syscall to complete.
+ ScopedThread t([&] {
+ MonotonicTimer timer;
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR));
+
+ // Only a single shared lock is held, the lock will be granted immediately.
+ // This should be granted without any blocking. Don't save here to avoid
+ // wild discrepencies on timing.
+ timer.Start();
+ ASSERT_THAT(flock(fd.get(), LOCK_SH), SyscallSucceeds());
+
+ // We held the lock for 30 seconds but this thread should not have
+ // blocked at all so we expect a very small duration on syscall completion.
+ ASSERT_LT(timer.Duration(),
+ absl::Seconds(1)); // 1000ms is much less than 30s.
+
+ // We can release our second shared lock
+ ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceeds());
+ });
+
+ // Sleep before unlocking.
+ absl::SleepFor(kHoldLockTime);
+
+ // Release the first shared lock. Don't save in this situation to avoid
+ // discrepencies in timing.
+ EXPECT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceeds());
+}
+
+TEST_F(FlockTest, BlockingLockFirstSharedSecondExclusive) {
+ // This test will verify that if someone holds a shared lock any attempt to
+ // obtain an exclusive lock will result in blocking.
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_SH), SyscallSucceeds());
+
+ // kHoldLockTime is the amount of time we will hold the lock before releasing.
+ constexpr absl::Duration kHoldLockTime = absl::Seconds(2);
+
+ const DisableSave ds; // Timing-related.
+
+ // We do this in another thread so we can determine if it was actually
+ // blocked by timing the amount of time it took for the syscall to complete.
+ ScopedThread t([&] {
+ MonotonicTimer timer;
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR));
+
+ // This exclusive lock should block because someone is already holding a
+ // shared lock. We don't save here to avoid wild discrepencies on timing.
+ timer.Start();
+ ASSERT_THAT(RetryEINTR(flock)(fd.get(), LOCK_EX), SyscallSucceeds());
+
+ // We should be blocked, we will expect to be blocked for more than 1.0s.
+ ASSERT_GT(timer.Duration(), absl::Seconds(1));
+
+ // We can release our exclusive lock.
+ ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceeds());
+ });
+
+ // Sleep before unlocking.
+ absl::SleepFor(kHoldLockTime);
+
+ // Release the shared lock allowing the thread to proceed.
+ // We don't save here to avoid wild discrepencies in timing.
+ EXPECT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceeds());
+}
+
+TEST_F(FlockTest, BlockingLockFirstExclusiveSecondShared) {
+ // This test will verify that if someone holds an exclusive lock any attempt
+ // to obtain a shared lock will result in blocking.
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX), SyscallSucceeds());
+
+ // kHoldLockTime is the amount of time we will hold the lock before releasing.
+ constexpr absl::Duration kHoldLockTime = absl::Seconds(2);
+
+ const DisableSave ds; // Timing-related.
+
+ // We do this in another thread so we can determine if it was actually
+ // blocked by timing the amount of time it took for the syscall to complete.
+ ScopedThread t([&] {
+ MonotonicTimer timer;
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR));
+
+ // This shared lock should block because someone is already holding an
+ // exclusive lock. We don't save here to avoid wild discrepencies on timing.
+ timer.Start();
+ ASSERT_THAT(RetryEINTR(flock)(fd.get(), LOCK_SH), SyscallSucceeds());
+
+ // We should be blocked, we will expect to be blocked for more than 1.0s.
+ ASSERT_GT(timer.Duration(), absl::Seconds(1));
+
+ // We can release our shared lock.
+ ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceeds());
+ });
+
+ // Sleep before unlocking.
+ absl::SleepFor(kHoldLockTime);
+
+ // Release the exclusive lock allowing the blocked thread to proceed.
+ // We don't save here to avoid wild discrepencies in timing.
+ EXPECT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceeds());
+}
+
+TEST_F(FlockTest, BlockingLockFirstExclusiveSecondExclusive) {
+ // This test will verify that if someone holds an exclusive lock any attempt
+ // to obtain another exclusive lock will result in blocking.
+ ASSERT_THAT(flock(test_file_fd_.get(), LOCK_EX), SyscallSucceeds());
+
+ // kHoldLockTime is the amount of time we will hold the lock before releasing.
+ constexpr absl::Duration kHoldLockTime = absl::Seconds(2);
+
+ const DisableSave ds; // Timing-related.
+
+ // We do this in another thread so we can determine if it was actually
+ // blocked by timing the amount of time it took for the syscall to complete.
+ ScopedThread t([&] {
+ MonotonicTimer timer;
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR));
+
+ // This exclusive lock should block because someone is already holding an
+ // exclusive lock.
+ timer.Start();
+ ASSERT_THAT(RetryEINTR(flock)(fd.get(), LOCK_EX), SyscallSucceeds());
+
+ // We should be blocked, we will expect to be blocked for more than 1.0s.
+ ASSERT_GT(timer.Duration(), absl::Seconds(1));
+
+ // We can release our exclusive lock.
+ ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceeds());
+ });
+
+ // Sleep before unlocking.
+ absl::SleepFor(kHoldLockTime);
+
+ // Release the exclusive lock allowing the blocked thread to proceed.
+ // We don't save to avoid wild discrepencies in timing.
+ EXPECT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceeds());
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/fork.cc b/test/syscalls/linux/fork.cc
new file mode 100644
index 000000000..1bff5e50f
--- /dev/null
+++ b/test/syscalls/linux/fork.cc
@@ -0,0 +1,413 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <atomic>
+
+#include "gtest/gtest.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/logging.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+using ::testing::Ge;
+
+class ForkTest : public ::testing::Test {
+ protected:
+ // SetUp creates a populated, open file.
+ void SetUp() override {
+ // Make a shared mapping.
+ shared_ = reinterpret_cast<char*>(mmap(0, kPageSize, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0));
+ ASSERT_NE(reinterpret_cast<void*>(shared_), MAP_FAILED);
+
+ // Make a private mapping.
+ private_ =
+ reinterpret_cast<char*>(mmap(0, kPageSize, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
+ ASSERT_NE(reinterpret_cast<void*>(private_), MAP_FAILED);
+
+ // Make a pipe.
+ ASSERT_THAT(pipe(pipes_), SyscallSucceeds());
+ }
+
+ // TearDown frees associated resources.
+ void TearDown() override {
+ EXPECT_THAT(munmap(shared_, kPageSize), SyscallSucceeds());
+ EXPECT_THAT(munmap(private_, kPageSize), SyscallSucceeds());
+ EXPECT_THAT(close(pipes_[0]), SyscallSucceeds());
+ EXPECT_THAT(close(pipes_[1]), SyscallSucceeds());
+ }
+
+ // Fork executes a clone system call.
+ pid_t Fork() {
+ pid_t pid = fork();
+ MaybeSave();
+ TEST_PCHECK_MSG(pid >= 0, "fork failed");
+ return pid;
+ }
+
+ // Wait waits for the given pid and returns the exit status. If the child was
+ // killed by a signal or an error occurs, then 256+signal is returned.
+ int Wait(pid_t pid) {
+ int status;
+ while (true) {
+ int rval = wait4(pid, &status, 0, NULL);
+ if (rval < 0) {
+ return rval;
+ }
+ if (rval != pid) {
+ continue;
+ }
+ if (WIFEXITED(status)) {
+ return WEXITSTATUS(status);
+ }
+ if (WIFSIGNALED(status)) {
+ return 256 + WTERMSIG(status);
+ }
+ }
+ }
+
+ // Exit exits the proccess.
+ void Exit(int code) {
+ _exit(code);
+
+ // Should never reach here. Since the exit above failed, we really don't
+ // have much in the way of options to indicate failure. So we just try to
+ // log an assertion failure to the logs. The parent process will likely
+ // fail anyways if exit is not working.
+ TEST_CHECK_MSG(false, "_exit returned");
+ }
+
+ // ReadByte reads a byte from the shared pipe.
+ char ReadByte() {
+ char val = -1;
+ TEST_PCHECK(ReadFd(pipes_[0], &val, 1) == 1);
+ MaybeSave();
+ return val;
+ }
+
+ // WriteByte writes a byte from the shared pipe.
+ void WriteByte(char val) {
+ TEST_PCHECK(WriteFd(pipes_[1], &val, 1) == 1);
+ MaybeSave();
+ }
+
+ // Shared pipe.
+ int pipes_[2];
+
+ // Shared mapping (one page).
+ char* shared_;
+
+ // Private mapping (one page).
+ char* private_;
+};
+
+TEST_F(ForkTest, Simple) {
+ pid_t child = Fork();
+ if (child == 0) {
+ Exit(0);
+ }
+ EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(ForkTest, ExitCode) {
+ pid_t child = Fork();
+ if (child == 0) {
+ Exit(123);
+ }
+ EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(123));
+ child = Fork();
+ if (child == 0) {
+ Exit(1);
+ }
+ EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(1));
+}
+
+TEST_F(ForkTest, Multi) {
+ pid_t child1 = Fork();
+ if (child1 == 0) {
+ Exit(0);
+ }
+ pid_t child2 = Fork();
+ if (child2 == 0) {
+ Exit(1);
+ }
+ EXPECT_THAT(Wait(child1), SyscallSucceedsWithValue(0));
+ EXPECT_THAT(Wait(child2), SyscallSucceedsWithValue(1));
+}
+
+TEST_F(ForkTest, Pipe) {
+ pid_t child = Fork();
+ if (child == 0) {
+ WriteByte(1);
+ Exit(0);
+ }
+ EXPECT_EQ(ReadByte(), 1);
+ EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(ForkTest, SharedMapping) {
+ pid_t child = Fork();
+ if (child == 0) {
+ // Wait for the parent.
+ ReadByte();
+ if (shared_[0] == 1) {
+ Exit(0);
+ }
+ // Failed.
+ Exit(1);
+ }
+ // Change the mapping.
+ ASSERT_EQ(shared_[0], 0);
+ shared_[0] = 1;
+ // Unblock the child.
+ WriteByte(0);
+ // Did it work?
+ EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(ForkTest, PrivateMapping) {
+ pid_t child = Fork();
+ if (child == 0) {
+ // Wait for the parent.
+ ReadByte();
+ if (private_[0] == 0) {
+ Exit(0);
+ }
+ // Failed.
+ Exit(1);
+ }
+ // Change the mapping.
+ ASSERT_EQ(private_[0], 0);
+ private_[0] = 1;
+ // Unblock the child.
+ WriteByte(0);
+ // Did it work?
+ EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0));
+}
+
+// Test that cpuid works after a fork.
+TEST_F(ForkTest, Cpuid) {
+ pid_t child = Fork();
+
+ // We should be able to determine the CPU vendor.
+ ASSERT_NE(GetCPUVendor(), CPUVendor::kUnknownVendor);
+
+ if (child == 0) {
+ Exit(0);
+ }
+ EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(ForkTest, Mmap) {
+ pid_t child = Fork();
+
+ if (child == 0) {
+ void* addr =
+ mmap(0, kPageSize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ MaybeSave();
+ Exit(addr == MAP_FAILED);
+ }
+
+ EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0));
+}
+
+static volatile int alarmed = 0;
+
+void AlarmHandler(int sig, siginfo_t* info, void* context) { alarmed = 1; }
+
+TEST_F(ForkTest, Alarm) {
+ // Setup an alarm handler.
+ struct sigaction sa;
+ sa.sa_sigaction = AlarmHandler;
+ sigfillset(&sa.sa_mask);
+ sa.sa_flags = SA_SIGINFO;
+ EXPECT_THAT(sigaction(SIGALRM, &sa, nullptr), SyscallSucceeds());
+
+ pid_t child = Fork();
+
+ if (child == 0) {
+ alarm(1);
+ sleep(3);
+ if (!alarmed) {
+ Exit(1);
+ }
+ Exit(0);
+ }
+
+ EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0));
+ EXPECT_EQ(0, alarmed);
+}
+
+// Child cannot affect parent private memory.
+TEST_F(ForkTest, PrivateMemory) {
+ std::atomic<uint32_t> local(0);
+
+ pid_t child1 = Fork();
+ if (child1 == 0) {
+ local++;
+
+ pid_t child2 = Fork();
+ if (child2 == 0) {
+ local++;
+
+ TEST_CHECK(local.load() == 2);
+
+ Exit(0);
+ }
+
+ TEST_PCHECK(Wait(child2) == 0);
+ TEST_CHECK(local.load() == 1);
+ Exit(0);
+ }
+
+ EXPECT_THAT(Wait(child1), SyscallSucceedsWithValue(0));
+ EXPECT_EQ(0, local.load());
+}
+
+// Kernel-accessed buffers should remain coherent across COW.
+TEST_F(ForkTest, COWSegment) {
+ constexpr int kBufSize = 1024;
+ char* read_buf = private_;
+ char* touch = private_ + kPageSize / 2;
+
+ std::string contents(kBufSize, 'a');
+
+ ScopedThread t([&] {
+ // Wait to be sure the parent is blocked in read.
+ absl::SleepFor(absl::Seconds(3));
+
+ // Fork to mark private pages for COW.
+ //
+ // Use fork directly rather than the Fork wrapper to skip the multi-threaded
+ // check, and limit the child to async-signal-safe functions:
+ //
+ // "After a fork() in a multithreaded program, the child can safely call
+ // only async-signal-safe functions (see signal(7)) until such time as it
+ // calls execve(2)."
+ //
+ // Skip ASSERT in the child, as it isn't async-signal-safe.
+ pid_t child = fork();
+ if (child == 0) {
+ // Wait to be sure parent touched memory.
+ sleep(3);
+ Exit(0);
+ }
+
+ // Check success only in the parent.
+ ASSERT_THAT(child, SyscallSucceedsWithValue(Ge(0)));
+
+ // Trigger COW on private page.
+ *touch = 42;
+
+ // Write to pipe. Parent should still be able to read this.
+ EXPECT_THAT(WriteFd(pipes_[1], contents.c_str(), kBufSize),
+ SyscallSucceedsWithValue(kBufSize));
+
+ EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0));
+ });
+
+ EXPECT_THAT(ReadFd(pipes_[0], read_buf, kBufSize),
+ SyscallSucceedsWithValue(kBufSize));
+ EXPECT_STREQ(contents.c_str(), read_buf);
+}
+
+TEST_F(ForkTest, SigAltStack) {
+ std::vector<char> stack_mem(SIGSTKSZ);
+ stack_t stack = {};
+ stack.ss_size = SIGSTKSZ;
+ stack.ss_sp = stack_mem.data();
+ ASSERT_THAT(sigaltstack(&stack, nullptr), SyscallSucceeds());
+
+ pid_t child = Fork();
+
+ if (child == 0) {
+ stack_t oss = {};
+ TEST_PCHECK(sigaltstack(nullptr, &oss) == 0);
+ MaybeSave();
+
+ TEST_CHECK((oss.ss_flags & SS_DISABLE) == 0);
+ TEST_CHECK(oss.ss_size == SIGSTKSZ);
+ TEST_CHECK(oss.ss_sp == stack.ss_sp);
+
+ Exit(0);
+ }
+ EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(ForkTest, Affinity) {
+ // Make a non-default cpumask.
+ cpu_set_t parent_mask;
+ EXPECT_THAT(sched_getaffinity(/*pid=*/0, sizeof(cpu_set_t), &parent_mask),
+ SyscallSucceeds());
+ // Knock out the lowest bit.
+ for (unsigned int n = 0; n < CPU_SETSIZE; n++) {
+ if (CPU_ISSET(n, &parent_mask)) {
+ CPU_CLR(n, &parent_mask);
+ break;
+ }
+ }
+ EXPECT_THAT(sched_setaffinity(/*pid=*/0, sizeof(cpu_set_t), &parent_mask),
+ SyscallSucceeds());
+
+ pid_t child = Fork();
+ if (child == 0) {
+ cpu_set_t child_mask;
+
+ int ret = sched_getaffinity(/*pid=*/0, sizeof(cpu_set_t), &child_mask);
+ MaybeSave();
+ if (ret < 0) {
+ Exit(-ret);
+ }
+
+ TEST_CHECK(CPU_EQUAL(&child_mask, &parent_mask));
+
+ Exit(0);
+ }
+
+ EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0));
+}
+
+#ifdef __x86_64__
+// Clone with CLONE_SETTLS and a non-canonical TLS address is rejected.
+TEST(CloneTest, NonCanonicalTLS) {
+ constexpr uintptr_t kNonCanonical = 1ull << 48;
+
+ // We need a valid address for the stack pointer. We'll never actually execute
+ // on this.
+ char stack;
+
+ EXPECT_THAT(syscall(__NR_clone, SIGCHLD | CLONE_SETTLS, &stack, nullptr,
+ nullptr, kNonCanonical),
+ SyscallFailsWithErrno(EPERM));
+}
+#endif
+
+} // namespace
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/fpsig_fork.cc b/test/syscalls/linux/fpsig_fork.cc
new file mode 100644
index 000000000..e8f1dfa8a
--- /dev/null
+++ b/test/syscalls/linux/fpsig_fork.cc
@@ -0,0 +1,105 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This test verifies that fork(2) in a signal handler will correctly
+// restore floating point state after the signal handler returns in both
+// the child and parent.
+#include <sys/time.h>
+
+#include "gtest/gtest.h"
+#include "test/util/logging.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+#define GET_XMM(__var, __xmm) \
+ asm volatile("movq %%" #__xmm ", %0" : "=r"(__var))
+#define SET_XMM(__var, __xmm) asm volatile("movq %0, %%" #__xmm : : "r"(__var))
+
+int parent, child;
+
+void sigusr1(int s, siginfo_t* siginfo, void* _uc) {
+ // Fork and clobber %xmm0. The fpstate should be restored by sigreturn(2)
+ // in both parent and child.
+ child = fork();
+ TEST_CHECK_MSG(child >= 0, "fork failed");
+
+ uint64_t val = SIGUSR1;
+ SET_XMM(val, xmm0);
+}
+
+TEST(FPSigTest, Fork) {
+ parent = getpid();
+ pid_t parent_tid = gettid();
+
+ struct sigaction sa = {};
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = SA_SIGINFO;
+ sa.sa_sigaction = sigusr1;
+ ASSERT_THAT(sigaction(SIGUSR1, &sa, nullptr), SyscallSucceeds());
+
+ // The amd64 ABI specifies that the XMM register set is caller-saved. This
+ // implies that if there is any function call between SET_XMM and GET_XMM the
+ // compiler might save/restore xmm0 implicitly. This defeats the entire
+ // purpose of the test which is to verify that fpstate is restored by
+ // sigreturn(2).
+ //
+ // This is the reason why 'tgkill(getpid(), gettid(), SIGUSR1)' is implemented
+ // in inline assembly below.
+ //
+ // If the OS is broken and registers are clobbered by the child, using tgkill
+ // to signal the current thread increases the likelihood that this thread will
+ // be the one clobbered.
+
+ uint64_t expected = 0xdeadbeeffacefeed;
+ SET_XMM(expected, xmm0);
+
+ asm volatile(
+ "movl %[killnr], %%eax;"
+ "movl %[parent], %%edi;"
+ "movl %[tid], %%esi;"
+ "movl %[sig], %%edx;"
+ "syscall;"
+ :
+ : [killnr] "i"(__NR_tgkill), [parent] "rm"(parent),
+ [tid] "rm"(parent_tid), [sig] "i"(SIGUSR1)
+ : "rax", "rdi", "rsi", "rdx",
+ // Clobbered by syscall.
+ "rcx", "r11");
+
+ uint64_t got;
+ GET_XMM(got, xmm0);
+
+ if (getpid() == parent) { // Parent.
+ int status;
+ ASSERT_THAT(waitpid(child, &status, 0), SyscallSucceedsWithValue(child));
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0);
+ }
+
+ // TEST_CHECK_MSG since this may run in the child.
+ TEST_CHECK_MSG(expected == got, "Bad xmm0 value");
+
+ if (getpid() != parent) { // Child.
+ _exit(0);
+ }
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/fpsig_nested.cc b/test/syscalls/linux/fpsig_nested.cc
new file mode 100644
index 000000000..2fa40b42d
--- /dev/null
+++ b/test/syscalls/linux/fpsig_nested.cc
@@ -0,0 +1,134 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This program verifies that application floating point state is restored
+// correctly after a signal handler returns. It also verifies that this works
+// with nested signals.
+#include <sys/time.h>
+
+#include "gtest/gtest.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+#define GET_XMM(__var, __xmm) \
+ asm volatile("movq %%" #__xmm ", %0" : "=r"(__var))
+#define SET_XMM(__var, __xmm) asm volatile("movq %0, %%" #__xmm : : "r"(__var))
+
+int pid;
+int tid;
+
+volatile uint64_t entryxmm[2] = {~0UL, ~0UL};
+volatile uint64_t exitxmm[2];
+
+void sigusr2(int s, siginfo_t* siginfo, void* _uc) {
+ uint64_t val = SIGUSR2;
+
+ // Record the value of %xmm0 on entry and then clobber it.
+ GET_XMM(entryxmm[1], xmm0);
+ SET_XMM(val, xmm0);
+ GET_XMM(exitxmm[1], xmm0);
+}
+
+void sigusr1(int s, siginfo_t* siginfo, void* _uc) {
+ uint64_t val = SIGUSR1;
+
+ // Record the value of %xmm0 on entry and then clobber it.
+ GET_XMM(entryxmm[0], xmm0);
+ SET_XMM(val, xmm0);
+
+ // Send a SIGUSR2 to ourself. The signal mask is configured such that
+ // the SIGUSR2 handler will run before this handler returns.
+ asm volatile(
+ "movl %[killnr], %%eax;"
+ "movl %[pid], %%edi;"
+ "movl %[tid], %%esi;"
+ "movl %[sig], %%edx;"
+ "syscall;"
+ :
+ : [killnr] "i"(__NR_tgkill), [pid] "rm"(pid), [tid] "rm"(tid),
+ [sig] "i"(SIGUSR2)
+ : "rax", "rdi", "rsi", "rdx",
+ // Clobbered by syscall.
+ "rcx", "r11");
+
+ // Record value of %xmm0 again to verify that the nested signal handler
+ // does not clobber it.
+ GET_XMM(exitxmm[0], xmm0);
+}
+
+TEST(FPSigTest, NestedSignals) {
+ pid = getpid();
+ tid = gettid();
+
+ struct sigaction sa = {};
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = SA_SIGINFO;
+ sa.sa_sigaction = sigusr1;
+ ASSERT_THAT(sigaction(SIGUSR1, &sa, nullptr), SyscallSucceeds());
+
+ sa.sa_sigaction = sigusr2;
+ ASSERT_THAT(sigaction(SIGUSR2, &sa, nullptr), SyscallSucceeds());
+
+ // The amd64 ABI specifies that the XMM register set is caller-saved. This
+ // implies that if there is any function call between SET_XMM and GET_XMM the
+ // compiler might save/restore xmm0 implicitly. This defeats the entire
+ // purpose of the test which is to verify that fpstate is restored by
+ // sigreturn(2).
+ //
+ // This is the reason why 'tgkill(getpid(), gettid(), SIGUSR1)' is implemented
+ // in inline assembly below.
+ //
+ // If the OS is broken and registers are clobbered by the signal, using tgkill
+ // to signal the current thread ensures that this is the clobbered thread.
+
+ uint64_t expected = 0xdeadbeeffacefeed;
+ SET_XMM(expected, xmm0);
+
+ asm volatile(
+ "movl %[killnr], %%eax;"
+ "movl %[pid], %%edi;"
+ "movl %[tid], %%esi;"
+ "movl %[sig], %%edx;"
+ "syscall;"
+ :
+ : [killnr] "i"(__NR_tgkill), [pid] "rm"(pid), [tid] "rm"(tid),
+ [sig] "i"(SIGUSR1)
+ : "rax", "rdi", "rsi", "rdx",
+ // Clobbered by syscall.
+ "rcx", "r11");
+
+ uint64_t got;
+ GET_XMM(got, xmm0);
+
+ //
+ // The checks below verifies the following:
+ // - signal handlers must called with a clean fpu state.
+ // - sigreturn(2) must restore fpstate of the interrupted context.
+ //
+ EXPECT_EQ(expected, got);
+ EXPECT_EQ(entryxmm[0], 0);
+ EXPECT_EQ(entryxmm[1], 0);
+ EXPECT_EQ(exitxmm[0], SIGUSR1);
+ EXPECT_EQ(exitxmm[1], SIGUSR2);
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/fsync.cc b/test/syscalls/linux/fsync.cc
new file mode 100644
index 000000000..536a73bf1
--- /dev/null
+++ b/test/syscalls/linux/fsync.cc
@@ -0,0 +1,55 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <string>
+
+#include "gtest/gtest.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(FsyncTest, TempFileSucceeds) {
+ std::string path = NewTempAbsPath();
+ int fd;
+ EXPECT_THAT(fd = open(path.c_str(), O_RDWR | O_CREAT, 0666),
+ SyscallSucceeds());
+ const std::string data = "some data to sync";
+ EXPECT_THAT(write(fd, data.c_str(), data.size()),
+ SyscallSucceedsWithValue(data.size()));
+ EXPECT_THAT(fsync(fd), SyscallSucceeds());
+ ASSERT_THAT(close(fd), SyscallSucceeds());
+ ASSERT_THAT(unlink(path.c_str()), SyscallSucceeds());
+}
+
+TEST(FsyncTest, CannotFsyncOnUnopenedFd) {
+ int fd;
+ auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ ASSERT_THAT(fd = open(f.path().c_str(), O_RDONLY), SyscallSucceeds());
+ ASSERT_THAT(close(fd), SyscallSucceeds());
+
+ // fd is now invalid.
+ EXPECT_THAT(fsync(fd), SyscallFailsWithErrno(EBADF));
+}
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/futex.cc b/test/syscalls/linux/futex.cc
new file mode 100644
index 000000000..6fa284013
--- /dev/null
+++ b/test/syscalls/linux/futex.cc
@@ -0,0 +1,595 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <linux/futex.h>
+#include <linux/types.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <atomic>
+#include <memory>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "absl/memory/memory.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/cleanup.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/memory_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+#include "test/util/timer_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Amount of time we wait for threads doing futex_wait to start running before
+// doing futex_wake.
+constexpr auto kWaiterStartupDelay = absl::Seconds(3);
+
+// Default timeout for waiters in tests where we expect a futex_wake to be
+// ineffective.
+constexpr auto kIneffectiveWakeTimeout = absl::Seconds(6);
+
+static_assert(kWaiterStartupDelay < kIneffectiveWakeTimeout,
+ "futex_wait will time out before futex_wake is called");
+
+int futex_wait(bool priv, std::atomic<int>* uaddr, int val,
+ absl::Duration timeout = absl::InfiniteDuration()) {
+ int op = FUTEX_WAIT;
+ if (priv) {
+ op |= FUTEX_PRIVATE_FLAG;
+ }
+
+ if (timeout == absl::InfiniteDuration()) {
+ return RetryEINTR(syscall)(SYS_futex, uaddr, op, val, nullptr);
+ }
+
+ // FUTEX_WAIT doesn't adjust the timeout if it returns EINTR, so we have to do
+ // so.
+ while (true) {
+ auto const timeout_ts = absl::ToTimespec(timeout);
+ MonotonicTimer timer;
+ timer.Start();
+ int const ret = syscall(SYS_futex, uaddr, op, val, &timeout_ts);
+ if (ret != -1 || errno != EINTR) {
+ return ret;
+ }
+ timeout = std::max(timeout - timer.Duration(), absl::ZeroDuration());
+ }
+}
+
+int futex_wait_bitset(bool priv, std::atomic<int>* uaddr, int val, int bitset,
+ absl::Time deadline = absl::InfiniteFuture()) {
+ int op = FUTEX_WAIT_BITSET | FUTEX_CLOCK_REALTIME;
+ if (priv) {
+ op |= FUTEX_PRIVATE_FLAG;
+ }
+
+ auto const deadline_ts = absl::ToTimespec(deadline);
+ return RetryEINTR(syscall)(
+ SYS_futex, uaddr, op, val,
+ deadline == absl::InfiniteFuture() ? nullptr : &deadline_ts, nullptr,
+ bitset);
+}
+
+int futex_wake(bool priv, std::atomic<int>* uaddr, int count) {
+ int op = FUTEX_WAKE;
+ if (priv) {
+ op |= FUTEX_PRIVATE_FLAG;
+ }
+ return syscall(SYS_futex, uaddr, op, count);
+}
+
+int futex_wake_bitset(bool priv, std::atomic<int>* uaddr, int count,
+ int bitset) {
+ int op = FUTEX_WAKE_BITSET;
+ if (priv) {
+ op |= FUTEX_PRIVATE_FLAG;
+ }
+ return syscall(SYS_futex, uaddr, op, count, nullptr, nullptr, bitset);
+}
+
+int futex_wake_op(bool priv, std::atomic<int>* uaddr1, std::atomic<int>* uaddr2,
+ int nwake1, int nwake2, uint32_t sub_op) {
+ int op = FUTEX_WAKE_OP;
+ if (priv) {
+ op |= FUTEX_PRIVATE_FLAG;
+ }
+ return syscall(SYS_futex, uaddr1, op, nwake1, nwake2, uaddr2, sub_op);
+}
+
+// Fixture for futex tests parameterized by whether to use private or shared
+// futexes.
+class PrivateAndSharedFutexTest : public ::testing::TestWithParam<bool> {
+ protected:
+ bool IsPrivate() const { return GetParam(); }
+ int PrivateFlag() const { return IsPrivate() ? FUTEX_PRIVATE_FLAG : 0; }
+};
+
+// FUTEX_WAIT with 0 timeout does not block.
+TEST_P(PrivateAndSharedFutexTest, Wait_ZeroTimeout) {
+ struct timespec timeout = {};
+
+ // Don't use the futex_wait helper because it adjusts timeout.
+ int a = 1;
+ EXPECT_THAT(syscall(SYS_futex, &a, FUTEX_WAIT | PrivateFlag(), a, &timeout),
+ SyscallFailsWithErrno(ETIMEDOUT));
+}
+
+TEST_P(PrivateAndSharedFutexTest, Wait_Timeout) {
+ std::atomic<int> a = ATOMIC_VAR_INIT(1);
+
+ MonotonicTimer timer;
+ timer.Start();
+ constexpr absl::Duration kTimeout = absl::Seconds(1);
+ EXPECT_THAT(futex_wait(IsPrivate(), &a, a, kTimeout),
+ SyscallFailsWithErrno(ETIMEDOUT));
+ EXPECT_GE(timer.Duration(), kTimeout);
+}
+
+TEST_P(PrivateAndSharedFutexTest, Wait_BitsetTimeout) {
+ std::atomic<int> a = ATOMIC_VAR_INIT(1);
+
+ MonotonicTimer timer;
+ timer.Start();
+ constexpr absl::Duration kTimeout = absl::Seconds(1);
+ EXPECT_THAT(
+ futex_wait_bitset(IsPrivate(), &a, a, 0xffffffff, absl::Now() + kTimeout),
+ SyscallFailsWithErrno(ETIMEDOUT));
+ EXPECT_GE(timer.Duration(), kTimeout);
+}
+
+TEST_P(PrivateAndSharedFutexTest, WaitBitset_NegativeTimeout) {
+ std::atomic<int> a = ATOMIC_VAR_INIT(1);
+
+ MonotonicTimer timer;
+ timer.Start();
+ EXPECT_THAT(futex_wait_bitset(IsPrivate(), &a, a, 0xffffffff,
+ absl::Now() - absl::Seconds(1)),
+ SyscallFailsWithErrno(ETIMEDOUT));
+}
+
+TEST_P(PrivateAndSharedFutexTest, Wait_WrongVal) {
+ std::atomic<int> a = ATOMIC_VAR_INIT(1);
+ EXPECT_THAT(futex_wait(IsPrivate(), &a, a + 1),
+ SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST_P(PrivateAndSharedFutexTest, Wait_ZeroBitset) {
+ std::atomic<int> a = ATOMIC_VAR_INIT(1);
+ EXPECT_THAT(futex_wait_bitset(IsPrivate(), &a, a, 0),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(PrivateAndSharedFutexTest, Wake1_NoRandomSave) {
+ constexpr int kInitialValue = 1;
+ std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue);
+
+ // Prevent save/restore from interrupting futex_wait, which will cause it to
+ // return EAGAIN instead of the expected result if futex_wait is restarted
+ // after we change the value of a below.
+ DisableSave ds;
+ ScopedThread thread([&] {
+ EXPECT_THAT(futex_wait(IsPrivate(), &a, kInitialValue),
+ SyscallSucceedsWithValue(0));
+ });
+ absl::SleepFor(kWaiterStartupDelay);
+
+ // Change a so that if futex_wake happens before futex_wait, the latter
+ // returns EAGAIN instead of hanging the test.
+ a.fetch_add(1);
+ EXPECT_THAT(futex_wake(IsPrivate(), &a, 1), SyscallSucceedsWithValue(1));
+}
+
+TEST_P(PrivateAndSharedFutexTest, WakeAll_NoRandomSave) {
+ constexpr int kInitialValue = 1;
+ std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue);
+
+ DisableSave ds;
+ constexpr int kThreads = 5;
+ std::vector<std::unique_ptr<ScopedThread>> threads;
+ threads.reserve(kThreads);
+ for (int i = 0; i < kThreads; i++) {
+ threads.push_back(absl::make_unique<ScopedThread>([&] {
+ EXPECT_THAT(futex_wait(IsPrivate(), &a, kInitialValue),
+ SyscallSucceeds());
+ }));
+ }
+ absl::SleepFor(kWaiterStartupDelay);
+
+ a.fetch_add(1);
+ EXPECT_THAT(futex_wake(IsPrivate(), &a, kThreads),
+ SyscallSucceedsWithValue(kThreads));
+}
+
+TEST_P(PrivateAndSharedFutexTest, WakeSome_NoRandomSave) {
+ constexpr int kInitialValue = 1;
+ std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue);
+
+ DisableSave ds;
+ constexpr int kThreads = 5;
+ constexpr int kWokenThreads = 3;
+ static_assert(kWokenThreads < kThreads,
+ "can't wake more threads than are created");
+ std::vector<std::unique_ptr<ScopedThread>> threads;
+ threads.reserve(kThreads);
+ std::vector<int> rets;
+ rets.reserve(kThreads);
+ std::vector<int> errs;
+ errs.reserve(kThreads);
+ for (int i = 0; i < kThreads; i++) {
+ rets.push_back(-1);
+ errs.push_back(0);
+ }
+ for (int i = 0; i < kThreads; i++) {
+ threads.push_back(absl::make_unique<ScopedThread>([&, i] {
+ rets[i] =
+ futex_wait(IsPrivate(), &a, kInitialValue, kIneffectiveWakeTimeout);
+ errs[i] = errno;
+ }));
+ }
+ absl::SleepFor(kWaiterStartupDelay);
+
+ a.fetch_add(1);
+ EXPECT_THAT(futex_wake(IsPrivate(), &a, kWokenThreads),
+ SyscallSucceedsWithValue(kWokenThreads));
+
+ int woken = 0;
+ int timedout = 0;
+ for (int i = 0; i < kThreads; i++) {
+ threads[i]->Join();
+ if (rets[i] == 0) {
+ woken++;
+ } else if (errs[i] == ETIMEDOUT) {
+ timedout++;
+ } else {
+ ADD_FAILURE() << " thread " << i << ": returned " << rets[i] << ", errno "
+ << errs[i];
+ }
+ }
+ EXPECT_EQ(woken, kWokenThreads);
+ EXPECT_EQ(timedout, kThreads - kWokenThreads);
+}
+
+TEST_P(PrivateAndSharedFutexTest, WaitBitset_Wake_NoRandomSave) {
+ constexpr int kInitialValue = 1;
+ std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue);
+
+ DisableSave ds;
+ ScopedThread thread([&] {
+ EXPECT_THAT(futex_wait_bitset(IsPrivate(), &a, kInitialValue, 0b01001000),
+ SyscallSucceeds());
+ });
+ absl::SleepFor(kWaiterStartupDelay);
+
+ a.fetch_add(1);
+ EXPECT_THAT(futex_wake(IsPrivate(), &a, 1), SyscallSucceedsWithValue(1));
+}
+
+TEST_P(PrivateAndSharedFutexTest, Wait_WakeBitset_NoRandomSave) {
+ constexpr int kInitialValue = 1;
+ std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue);
+
+ DisableSave ds;
+ ScopedThread thread([&] {
+ EXPECT_THAT(futex_wait(IsPrivate(), &a, kInitialValue), SyscallSucceeds());
+ });
+ absl::SleepFor(kWaiterStartupDelay);
+
+ a.fetch_add(1);
+ EXPECT_THAT(futex_wake_bitset(IsPrivate(), &a, 1, 0b01001000),
+ SyscallSucceedsWithValue(1));
+}
+
+TEST_P(PrivateAndSharedFutexTest, WaitBitset_WakeBitsetMatch_NoRandomSave) {
+ constexpr int kInitialValue = 1;
+ std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue);
+
+ constexpr int kBitset = 0b01001000;
+
+ DisableSave ds;
+ ScopedThread thread([&] {
+ EXPECT_THAT(futex_wait_bitset(IsPrivate(), &a, kInitialValue, kBitset),
+ SyscallSucceeds());
+ });
+ absl::SleepFor(kWaiterStartupDelay);
+
+ a.fetch_add(1);
+ EXPECT_THAT(futex_wake_bitset(IsPrivate(), &a, 1, kBitset),
+ SyscallSucceedsWithValue(1));
+}
+
+TEST_P(PrivateAndSharedFutexTest, WaitBitset_WakeBitsetNoMatch_NoRandomSave) {
+ constexpr int kInitialValue = 1;
+ std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue);
+
+ constexpr int kWaitBitset = 0b01000001;
+ constexpr int kWakeBitset = 0b00101000;
+ static_assert((kWaitBitset & kWakeBitset) == 0,
+ "futex_wake_bitset will wake waiter");
+
+ DisableSave ds;
+ ScopedThread thread([&] {
+ EXPECT_THAT(futex_wait_bitset(IsPrivate(), &a, kInitialValue, kWaitBitset,
+ absl::Now() + kIneffectiveWakeTimeout),
+ SyscallFailsWithErrno(ETIMEDOUT));
+ });
+ absl::SleepFor(kWaiterStartupDelay);
+
+ a.fetch_add(1);
+ EXPECT_THAT(futex_wake_bitset(IsPrivate(), &a, 1, kWakeBitset),
+ SyscallSucceedsWithValue(0));
+}
+
+TEST_P(PrivateAndSharedFutexTest, WakeOpCondSuccess_NoRandomSave) {
+ constexpr int kInitialValue = 1;
+ std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue);
+ std::atomic<int> b = ATOMIC_VAR_INIT(kInitialValue);
+
+ DisableSave ds;
+ ScopedThread thread_a([&] {
+ EXPECT_THAT(futex_wait(IsPrivate(), &a, kInitialValue), SyscallSucceeds());
+ });
+ ScopedThread thread_b([&] {
+ EXPECT_THAT(futex_wait(IsPrivate(), &b, kInitialValue), SyscallSucceeds());
+ });
+ absl::SleepFor(kWaiterStartupDelay);
+
+ a.fetch_add(1);
+ b.fetch_add(1);
+ // This futex_wake_op should:
+ // - Wake 1 waiter on a unconditionally.
+ // - Wake 1 waiter on b if b == kInitialValue + 1, which it is.
+ // - Do "b += 1".
+ EXPECT_THAT(futex_wake_op(IsPrivate(), &a, &b, 1, 1,
+ FUTEX_OP(FUTEX_OP_ADD, 1, FUTEX_OP_CMP_EQ,
+ (kInitialValue + 1))),
+ SyscallSucceedsWithValue(2));
+ EXPECT_EQ(b, kInitialValue + 2);
+}
+
+TEST_P(PrivateAndSharedFutexTest, WakeOpCondFailure_NoRandomSave) {
+ constexpr int kInitialValue = 1;
+ std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue);
+ std::atomic<int> b = ATOMIC_VAR_INIT(kInitialValue);
+
+ DisableSave ds;
+ ScopedThread thread_a([&] {
+ EXPECT_THAT(futex_wait(IsPrivate(), &a, kInitialValue), SyscallSucceeds());
+ });
+ ScopedThread thread_b([&] {
+ EXPECT_THAT(
+ futex_wait(IsPrivate(), &b, kInitialValue, kIneffectiveWakeTimeout),
+ SyscallFailsWithErrno(ETIMEDOUT));
+ });
+ absl::SleepFor(kWaiterStartupDelay);
+
+ a.fetch_add(1);
+ b.fetch_add(1);
+ // This futex_wake_op should:
+ // - Wake 1 waiter on a unconditionally.
+ // - Wake 1 waiter on b if b == kInitialValue - 1, which it isn't.
+ // - Do "b += 1".
+ EXPECT_THAT(futex_wake_op(IsPrivate(), &a, &b, 1, 1,
+ FUTEX_OP(FUTEX_OP_ADD, 1, FUTEX_OP_CMP_EQ,
+ (kInitialValue - 1))),
+ SyscallSucceedsWithValue(1));
+ EXPECT_EQ(b, kInitialValue + 2);
+}
+
+TEST_P(PrivateAndSharedFutexTest, NoWakeInterprocessPrivateAnon_NoRandomSave) {
+ auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
+ MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+ auto const ptr = static_cast<std::atomic<int>*>(mapping.ptr());
+ constexpr int kInitialValue = 1;
+ ptr->store(kInitialValue);
+
+ DisableSave ds;
+ pid_t const child_pid = fork();
+ if (child_pid == 0) {
+ TEST_PCHECK(futex_wait(IsPrivate(), ptr, kInitialValue,
+ kIneffectiveWakeTimeout) == -1 &&
+ errno == ETIMEDOUT);
+ _exit(0);
+ }
+ ASSERT_THAT(child_pid, SyscallSucceeds());
+ absl::SleepFor(kWaiterStartupDelay);
+
+ EXPECT_THAT(futex_wake(IsPrivate(), ptr, 1), SyscallSucceedsWithValue(0));
+
+ int status;
+ ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << " status " << status;
+}
+
+TEST_P(PrivateAndSharedFutexTest, WakeAfterCOWBreak_NoRandomSave) {
+ // Use a futex on a non-stack mapping so we can be sure that the child process
+ // below isn't the one that breaks copy-on-write.
+ auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
+ MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+ auto const ptr = static_cast<std::atomic<int>*>(mapping.ptr());
+ constexpr int kInitialValue = 1;
+ ptr->store(kInitialValue);
+
+ DisableSave ds;
+ ScopedThread thread([&] {
+ EXPECT_THAT(futex_wait(IsPrivate(), ptr, kInitialValue), SyscallSucceeds());
+ });
+ absl::SleepFor(kWaiterStartupDelay);
+
+ pid_t const child_pid = fork();
+ if (child_pid == 0) {
+ // Wait to be killed by the parent.
+ while (true) pause();
+ }
+ ASSERT_THAT(child_pid, SyscallSucceeds());
+ auto cleanup_child = Cleanup([&] {
+ EXPECT_THAT(kill(child_pid, SIGKILL), SyscallSucceeds());
+ int status;
+ ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
+ << " status " << status;
+ });
+
+ // In addition to preventing a late futex_wait from sleeping, this breaks
+ // copy-on-write on the mapped page.
+ ptr->fetch_add(1);
+ EXPECT_THAT(futex_wake(IsPrivate(), ptr, 1), SyscallSucceedsWithValue(1));
+}
+
+TEST_P(PrivateAndSharedFutexTest, WakeWrongKind_NoRandomSave) {
+ constexpr int kInitialValue = 1;
+ std::atomic<int> a = ATOMIC_VAR_INIT(kInitialValue);
+
+ DisableSave ds;
+ ScopedThread thread([&] {
+ EXPECT_THAT(
+ futex_wait(IsPrivate(), &a, kInitialValue, kIneffectiveWakeTimeout),
+ SyscallFailsWithErrno(ETIMEDOUT));
+ });
+ absl::SleepFor(kWaiterStartupDelay);
+
+ a.fetch_add(1);
+ // The value of priv passed to futex_wake is the opposite of that passed to
+ // the futex_waiter; we expect this not to wake the waiter.
+ EXPECT_THAT(futex_wake(!IsPrivate(), &a, 1), SyscallSucceedsWithValue(0));
+}
+
+INSTANTIATE_TEST_CASE_P(SharedPrivate, PrivateAndSharedFutexTest,
+ ::testing::Bool());
+
+// Passing null as the address only works for private futexes.
+
+TEST(PrivateFutexTest, WakeOp0Set) {
+ std::atomic<int> a = ATOMIC_VAR_INIT(1);
+
+ int futex_op = FUTEX_OP(FUTEX_OP_SET, 2, 0, 0);
+ EXPECT_THAT(futex_wake_op(true, nullptr, &a, 0, 0, futex_op),
+ SyscallSucceedsWithValue(0));
+ EXPECT_EQ(a, 2);
+}
+
+TEST(PrivateFutexTest, WakeOp0Add) {
+ std::atomic<int> a = ATOMIC_VAR_INIT(1);
+ int futex_op = FUTEX_OP(FUTEX_OP_ADD, 1, 0, 0);
+ EXPECT_THAT(futex_wake_op(true, nullptr, &a, 0, 0, futex_op),
+ SyscallSucceedsWithValue(0));
+ EXPECT_EQ(a, 2);
+}
+
+TEST(PrivateFutexTest, WakeOp0Or) {
+ std::atomic<int> a = ATOMIC_VAR_INIT(0b01);
+ int futex_op = FUTEX_OP(FUTEX_OP_OR, 0b10, 0, 0);
+ EXPECT_THAT(futex_wake_op(true, nullptr, &a, 0, 0, futex_op),
+ SyscallSucceedsWithValue(0));
+ EXPECT_EQ(a, 0b11);
+}
+
+TEST(PrivateFutexTest, WakeOp0Andn) {
+ std::atomic<int> a = ATOMIC_VAR_INIT(0b11);
+ int futex_op = FUTEX_OP(FUTEX_OP_ANDN, 0b10, 0, 0);
+ EXPECT_THAT(futex_wake_op(true, nullptr, &a, 0, 0, futex_op),
+ SyscallSucceedsWithValue(0));
+ EXPECT_EQ(a, 0b01);
+}
+
+TEST(PrivateFutexTest, WakeOp0Xor) {
+ std::atomic<int> a = ATOMIC_VAR_INIT(0b1010);
+ int futex_op = FUTEX_OP(FUTEX_OP_XOR, 0b1100, 0, 0);
+ EXPECT_THAT(futex_wake_op(true, nullptr, &a, 0, 0, futex_op),
+ SyscallSucceedsWithValue(0));
+ EXPECT_EQ(a, 0b0110);
+}
+
+TEST(SharedFutexTest, WakeInterprocessSharedAnon_NoRandomSave) {
+ auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
+ MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED));
+ auto const ptr = static_cast<std::atomic<int>*>(mapping.ptr());
+ constexpr int kInitialValue = 1;
+ ptr->store(kInitialValue);
+
+ DisableSave ds;
+ pid_t const child_pid = fork();
+ if (child_pid == 0) {
+ TEST_PCHECK(futex_wait(false, ptr, kInitialValue) == 0);
+ _exit(0);
+ }
+ ASSERT_THAT(child_pid, SyscallSucceeds());
+ auto kill_child = Cleanup(
+ [&] { EXPECT_THAT(kill(child_pid, SIGKILL), SyscallSucceeds()); });
+ absl::SleepFor(kWaiterStartupDelay);
+
+ ptr->fetch_add(1);
+ // This is an ASSERT so that if it fails, we immediately abort the test (and
+ // kill the subprocess).
+ ASSERT_THAT(futex_wake(false, ptr, 1), SyscallSucceedsWithValue(1));
+
+ kill_child.Release();
+ int status;
+ ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << " status " << status;
+}
+
+TEST(SharedFutexTest, WakeInterprocessFile_NoRandomSave) {
+ auto const file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ ASSERT_THAT(truncate(file.path().c_str(), kPageSize), SyscallSucceeds());
+ auto const fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR));
+ auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(Mmap(
+ nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd.get(), 0));
+ auto const ptr = static_cast<std::atomic<int>*>(mapping.ptr());
+ constexpr int kInitialValue = 1;
+ ptr->store(kInitialValue);
+
+ DisableSave ds;
+ pid_t const child_pid = fork();
+ if (child_pid == 0) {
+ TEST_PCHECK(futex_wait(false, ptr, kInitialValue) == 0);
+ _exit(0);
+ }
+ ASSERT_THAT(child_pid, SyscallSucceeds());
+ auto kill_child = Cleanup(
+ [&] { EXPECT_THAT(kill(child_pid, SIGKILL), SyscallSucceeds()); });
+ absl::SleepFor(kWaiterStartupDelay);
+
+ ptr->fetch_add(1);
+ // This is an ASSERT so that if it fails, we immediately abort the test (and
+ // kill the subprocess).
+ ASSERT_THAT(futex_wake(false, ptr, 1), SyscallSucceedsWithValue(1));
+
+ kill_child.Release();
+ int status;
+ ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << " status " << status;
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/getcpu.cc b/test/syscalls/linux/getcpu.cc
new file mode 100644
index 000000000..3a52b25fa
--- /dev/null
+++ b/test/syscalls/linux/getcpu.cc
@@ -0,0 +1,40 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sched.h>
+
+#include "gtest/gtest.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(GetcpuTest, IsValidCpuStress) {
+ const int num_cpus = NumCPUs();
+ absl::Time deadline = absl::Now() + absl::Seconds(10);
+ while (absl::Now() < deadline) {
+ int cpu;
+ ASSERT_THAT(cpu = sched_getcpu(), SyscallSucceeds());
+ ASSERT_LT(cpu, num_cpus);
+ }
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/getdents.cc b/test/syscalls/linux/getdents.cc
new file mode 100644
index 000000000..5db580aa0
--- /dev/null
+++ b/test/syscalls/linux/getdents.cc
@@ -0,0 +1,485 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/eventfd.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <syscall.h>
+#include <unistd.h>
+#include <map>
+#include <string>
+#include <unordered_set>
+#include <utility>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/strings/numbers.h"
+#include "absl/strings/str_cat.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+using ::testing::IsEmpty;
+using ::testing::IsSupersetOf;
+using ::testing::Not;
+using ::testing::NotNull;
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// New Linux dirent format.
+struct linux_dirent64 {
+ uint64_t d_ino; // Inode number
+ int64_t d_off; // Offset to next linux_dirent64
+ unsigned short d_reclen; // NOLINT, Length of this linux_dirent64
+ unsigned char d_type; // NOLINT, File type
+ char d_name[0]; // Filename (null-terminated)
+};
+
+// Old Linux dirent format.
+struct linux_dirent {
+ unsigned long d_ino; // NOLINT
+ unsigned long d_off; // NOLINT
+ unsigned short d_reclen; // NOLINT
+ char d_name[0];
+};
+
+// Wraps a buffer to provide a set of dirents.
+// T is the underlying dirent type.
+template <typename T>
+class DirentBuffer {
+ public:
+ // DirentBuffer manages the buffer.
+ explicit DirentBuffer(size_t size)
+ : managed_(true), actual_size_(size), reported_size_(size) {
+ data_ = new char[actual_size_];
+ }
+
+ // The buffer is managed externally.
+ DirentBuffer(char* data, size_t actual_size, size_t reported_size)
+ : managed_(false),
+ data_(data),
+ actual_size_(actual_size),
+ reported_size_(reported_size) {}
+
+ ~DirentBuffer() {
+ if (managed_) {
+ delete[] data_;
+ }
+ }
+
+ T* Data() { return reinterpret_cast<T*>(data_); }
+
+ T* Start(size_t read) {
+ read_ = read;
+ if (read_) {
+ return Data();
+ } else {
+ return nullptr;
+ }
+ }
+
+ T* Current() { return reinterpret_cast<T*>(&data_[off_]); }
+
+ T* Next() {
+ size_t new_off = off_ + Current()->d_reclen;
+ if (new_off >= read_ || new_off >= actual_size_) {
+ return nullptr;
+ }
+
+ off_ = new_off;
+ return Current();
+ }
+
+ size_t Size() { return reported_size_; }
+
+ void Reset() {
+ off_ = 0;
+ read_ = 0;
+ memset(data_, 0, actual_size_);
+ }
+
+ private:
+ bool managed_;
+ char* data_;
+ size_t actual_size_;
+ size_t reported_size_;
+
+ size_t off_ = 0;
+
+ size_t read_ = 0;
+};
+
+// Test for getdents/getdents64.
+// T is the Linux dirent type.
+template <typename T>
+class GetdentsTest : public ::testing::Test {
+ public:
+ using LinuxDirentType = T;
+ using DirentBufferType = DirentBuffer<T>;
+
+ protected:
+ void SetUp() override {
+ dir_ = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ fd_ = ASSERT_NO_ERRNO_AND_VALUE(Open(dir_.path(), O_RDONLY | O_DIRECTORY));
+ }
+
+ // Must be overridden with explicit specialization. See below.
+ int SyscallNum();
+
+ int Getdents(LinuxDirentType* dirp, unsigned int count) {
+ return RetryEINTR(syscall)(SyscallNum(), fd_.get(), dirp, count);
+ }
+
+ // Fill directory with num files, named by number starting at 0.
+ void FillDirectory(size_t num) {
+ for (size_t i = 0; i < num; i++) {
+ auto name = JoinPath(dir_.path(), absl::StrCat(i));
+ TEST_CHECK(CreateWithContents(name, "").ok());
+ }
+ }
+
+ // Fill directory with a given list of filenames.
+ void FillDirectoryWithFiles(const std::vector<std::string>& filenames) {
+ for (const auto& filename : filenames) {
+ auto name = JoinPath(dir_.path(), filename);
+ TEST_CHECK(CreateWithContents(name, "").ok());
+ }
+ }
+
+ // Seek to the start of the directory.
+ PosixError SeekStart() {
+ constexpr off_t kStartOfFile = 0;
+ off_t offset = lseek(fd_.get(), kStartOfFile, SEEK_SET);
+ if (offset < 0) {
+ return PosixError(errno, absl::StrCat("error seeking to ", kStartOfFile));
+ }
+ if (offset != kStartOfFile) {
+ return PosixError(EINVAL, absl::StrCat("tried to seek to ", kStartOfFile,
+ " but got ", offset));
+ }
+ return NoError();
+ }
+
+ // Call getdents multiple times, reading all dirents and calling f on each.
+ // f has the type signature PosixError f(T*).
+ // If f returns a non-OK error, so does ReadDirents.
+ template <typename F>
+ PosixError ReadDirents(DirentBufferType* dirents, F const& f) {
+ int n;
+ do {
+ dirents->Reset();
+
+ n = Getdents(dirents->Data(), dirents->Size());
+ MaybeSave();
+ if (n < 0) {
+ return PosixError(errno, "getdents");
+ }
+
+ for (auto d = dirents->Start(n); d; d = dirents->Next()) {
+ RETURN_IF_ERRNO(f(d));
+ }
+ } while (n > 0);
+
+ return NoError();
+ }
+
+ // Call Getdents successively and count all entries.
+ int ReadAndCountAllEntries(DirentBufferType* dirents) {
+ int found = 0;
+
+ EXPECT_NO_ERRNO(ReadDirents(dirents, [&](LinuxDirentType* d) {
+ found++;
+ return NoError();
+ }));
+
+ return found;
+ }
+
+ private:
+ TempPath dir_;
+ FileDescriptor fd_;
+};
+
+// GUnit TYPED_TEST_CASE does not allow multiple template parameters, so we
+// must use explicit template specialization to set the syscall number.
+template <>
+int GetdentsTest<struct linux_dirent>::SyscallNum() {
+ return SYS_getdents;
+}
+
+template <>
+int GetdentsTest<struct linux_dirent64>::SyscallNum() {
+ return SYS_getdents64;
+}
+
+// Test both legacy getdents and getdents64.
+typedef ::testing::Types<struct linux_dirent, struct linux_dirent64>
+ GetdentsTypes;
+TYPED_TEST_CASE(GetdentsTest, GetdentsTypes);
+
+// N.B. TYPED_TESTs require explicitly using this-> to access members of
+// GetdentsTest, since we are inside of a derived class template.
+
+TYPED_TEST(GetdentsTest, VerifyEntries) {
+ typename TestFixture::DirentBufferType dirents(1024);
+
+ this->FillDirectory(2);
+
+ // Map of all the entries we expect to find.
+ std::map<std::string, bool> found;
+ found["."] = false;
+ found[".."] = false;
+ found["0"] = false;
+ found["1"] = false;
+
+ EXPECT_NO_ERRNO(this->ReadDirents(
+ &dirents, [&](typename TestFixture::LinuxDirentType* d) {
+ auto kv = found.find(d->d_name);
+ EXPECT_NE(kv, found.end()) << "Unexpected file: " << d->d_name;
+ if (kv != found.end()) {
+ EXPECT_FALSE(kv->second);
+ }
+ found[d->d_name] = true;
+ return NoError();
+ }));
+
+ for (auto& kv : found) {
+ EXPECT_TRUE(kv.second) << "File not found: " << kv.first;
+ }
+}
+
+TYPED_TEST(GetdentsTest, VerifyPadding) {
+ typename TestFixture::DirentBufferType dirents(1024);
+
+ // Create files with names of length 1 through 16.
+ std::vector<std::string> files;
+ std::string filename;
+ for (int i = 0; i < 16; ++i) {
+ absl::StrAppend(&filename, "a");
+ files.push_back(filename);
+ }
+ this->FillDirectoryWithFiles(files);
+
+ // We expect to find all the files, plus '.' and '..'.
+ const int expect_found = 2 + files.size();
+ int found = 0;
+
+ EXPECT_NO_ERRNO(this->ReadDirents(
+ &dirents, [&](typename TestFixture::LinuxDirentType* d) {
+ EXPECT_EQ(d->d_reclen % 8, 0)
+ << "Dirent " << d->d_name
+ << " had reclen that was not byte aligned: " << d->d_name;
+ found++;
+ return NoError();
+ }));
+
+ // Make sure we found all the files.
+ EXPECT_EQ(found, expect_found);
+}
+
+// For a small directory, the provided buffer should be large enough
+// for all entries.
+TYPED_TEST(GetdentsTest, SmallDir) {
+ // . and .. should be in an otherwise empty directory.
+ int expect = 2;
+
+ // Add some actual files.
+ this->FillDirectory(2);
+ expect += 2;
+
+ typename TestFixture::DirentBufferType dirents(256);
+
+ EXPECT_EQ(expect, this->ReadAndCountAllEntries(&dirents));
+}
+
+// A directory with lots of files requires calling getdents multiple times.
+TYPED_TEST(GetdentsTest, LargeDir) {
+ // . and .. should be in an otherwise empty directory.
+ int expect = 2;
+
+ // Add some actual files.
+ this->FillDirectory(100);
+ expect += 100;
+
+ typename TestFixture::DirentBufferType dirents(256);
+
+ EXPECT_EQ(expect, this->ReadAndCountAllEntries(&dirents));
+}
+
+// If we lie about the size of the buffer, we should still be able to read the
+// entries with the available space.
+TYPED_TEST(GetdentsTest, PartialBuffer) {
+ // . and .. should be in an otherwise empty directory.
+ int expect = 2;
+
+ // Add some actual files.
+ this->FillDirectory(100);
+ expect += 100;
+
+ void* addr = mmap(0, 2 * kPageSize, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(addr, MAP_FAILED);
+
+ char* buf = reinterpret_cast<char*>(addr);
+
+ // Guard page
+ EXPECT_THAT(
+ mprotect(reinterpret_cast<void*>(buf + kPageSize), kPageSize, PROT_NONE),
+ SyscallSucceeds());
+
+ // Limit space in buf to 256 bytes.
+ buf += kPageSize - 256;
+
+ // Lie about the buffer. Even though we claim the buffer is 1 page,
+ // we should still get all of the dirents in the first 256 bytes.
+ typename TestFixture::DirentBufferType dirents(buf, 256, kPageSize);
+
+ EXPECT_EQ(expect, this->ReadAndCountAllEntries(&dirents));
+
+ EXPECT_THAT(munmap(addr, 2 * kPageSize), SyscallSucceeds());
+}
+
+// Open many file descriptors, then scan through /proc/self/fd to find and close
+// them all. (The latter is commonly used to handle races betweek fork/execve
+// and the creation of unwanted non-O_CLOEXEC file descriptors.) This tests that
+// getdents iterates correctly despite mutation of /proc/self/fd.
+TYPED_TEST(GetdentsTest, ProcSelfFd) {
+ constexpr size_t kNfds = 10;
+ std::unordered_set<int> fds;
+ std::vector<FileDescriptor> fd_closers;
+ fd_closers.reserve(fds.size());
+ for (int fd : fds) {
+ fd_closers.emplace_back(fd);
+ }
+ for (size_t i = 0; i < kNfds; i++) {
+ int fd;
+ ASSERT_THAT(fd = eventfd(0, 0), SyscallSucceeds());
+ fds.insert(fd);
+ }
+
+ const FileDescriptor proc_self_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/fd", O_RDONLY | O_DIRECTORY));
+
+ // Make the buffer very small since we want to iterate.
+ typename TestFixture::DirentBufferType dirents(
+ 2 * sizeof(typename TestFixture::LinuxDirentType));
+ std::unordered_set<int> prev_fds;
+ while (true) {
+ dirents.Reset();
+ int rv;
+ ASSERT_THAT(rv = RetryEINTR(syscall)(this->SyscallNum(), proc_self_fd.get(),
+ dirents.Data(), dirents.Size()),
+ SyscallSucceeds());
+ if (rv == 0) {
+ break;
+ }
+ for (auto* d = dirents.Start(rv); d; d = dirents.Next()) {
+ int dfd;
+ if (!absl::SimpleAtoi(d->d_name, &dfd)) continue;
+ EXPECT_TRUE(prev_fds.insert(dfd).second)
+ << "Repeated observation of /proc/self/fd/" << dfd;
+ auto it = fds.find(dfd);
+ if (it != fds.end()) {
+ fds.erase(it);
+ EXPECT_THAT(close(dfd), SyscallSucceeds());
+ }
+ }
+ }
+
+ // Check that we closed every fd.
+ EXPECT_THAT(fds, ::testing::IsEmpty());
+}
+
+// Test that getdents returns ENOTDIR when called on a file.
+TYPED_TEST(GetdentsTest, NotDir) {
+ auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY));
+
+ typename TestFixture::DirentBufferType dirents(256);
+ EXPECT_THAT(RetryEINTR(syscall)(this->SyscallNum(), fd.get(), dirents.Data(),
+ dirents.Size()),
+ SyscallFailsWithErrno(ENOTDIR));
+}
+
+// Test that SEEK_SET to 0 causes getdents to re-read the entries.
+TYPED_TEST(GetdentsTest, SeekResetsCursor) {
+ // . and .. should be in an otherwise empty directory.
+ int expect = 2;
+
+ // Add some files to the directory.
+ this->FillDirectory(10);
+ expect += 10;
+
+ typename TestFixture::DirentBufferType dirents(256);
+
+ // We should get all the expected entries.
+ EXPECT_EQ(expect, this->ReadAndCountAllEntries(&dirents));
+
+ // Seek back to 0.
+ ASSERT_NO_ERRNO(this->SeekStart());
+
+ // We should get all the expected entries again.
+ EXPECT_EQ(expect, this->ReadAndCountAllEntries(&dirents));
+}
+
+// Some tests using the glibc readdir interface.
+TEST(ReaddirTest, OpenDir) {
+ DIR* dev;
+ ASSERT_THAT(dev = opendir("/dev"), NotNull());
+ EXPECT_THAT(closedir(dev), SyscallSucceeds());
+}
+
+TEST(ReaddirTest, RootContainsBasicDirectories) {
+ EXPECT_THAT(ListDir("/", true),
+ IsPosixErrorOkAndHolds(IsSupersetOf(
+ {"bin", "dev", "etc", "lib", "proc", "sbin", "usr"})));
+}
+
+TEST(ReaddirTest, Bug24096713Dev) {
+ auto contents = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/dev", true));
+ EXPECT_THAT(contents, Not(IsEmpty()));
+}
+
+TEST(ReaddirTest, Bug24096713ProcTid) {
+ auto contents = ASSERT_NO_ERRNO_AND_VALUE(
+ ListDir(absl::StrCat("/proc/", syscall(SYS_gettid), "/"), true));
+ EXPECT_THAT(contents, Not(IsEmpty()));
+}
+
+TEST(ReaddirTest, Bug33429925Proc) {
+ auto contents = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/proc", true));
+ EXPECT_THAT(contents, Not(IsEmpty()));
+}
+
+TEST(ReaddirTest, Bug35110122Root) {
+ auto contents = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/", true));
+ EXPECT_THAT(contents, Not(IsEmpty()));
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/getrandom.cc b/test/syscalls/linux/getrandom.cc
new file mode 100644
index 000000000..be5325497
--- /dev/null
+++ b/test/syscalls/linux/getrandom.cc
@@ -0,0 +1,61 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+#ifndef SYS_getrandom
+#if defined(__x86_64__)
+#define SYS_getrandom 318
+#elif defined(__i386__)
+#define SYS_getrandom 355
+#else
+#error "Unknown architecture"
+#endif
+#endif // SYS_getrandom
+
+bool SomeByteIsNonZero(char* random_bytes, int length) {
+ for (int i = 0; i < length; i++) {
+ if (random_bytes[i] != 0) {
+ return true;
+ }
+ }
+ return false;
+}
+
+TEST(GetrandomTest, IsRandom) {
+ // This test calls get_random and makes sure that the array is filled in with
+ // something that is non-zero. Perhaps we get back \x00\x00\x00\x00\x00.... as
+ // a random result, but it's so unlikely that we'll just ignore this.
+ char random_bytes[64] = {};
+ int n = syscall(SYS_getrandom, random_bytes, 64, 0);
+ SKIP_IF(!IsRunningOnGvisor() && n < 0 && errno == ENOSYS);
+ EXPECT_THAT(n, SyscallSucceeds());
+ EXPECT_GT(n, 0); // Some bytes should be returned.
+ EXPECT_TRUE(SomeByteIsNonZero(random_bytes, n));
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/getrusage.cc b/test/syscalls/linux/getrusage.cc
new file mode 100644
index 000000000..1ae603858
--- /dev/null
+++ b/test/syscalls/linux/getrusage.cc
@@ -0,0 +1,177 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <signal.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "gtest/gtest.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/logging.h"
+#include "test/util/memory_util.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(GetrusageTest, BasicFork) {
+ pid_t pid = fork();
+ if (pid == 0) {
+ struct rusage rusage_self;
+ TEST_PCHECK(getrusage(RUSAGE_SELF, &rusage_self) == 0);
+ struct rusage rusage_children;
+ TEST_PCHECK(getrusage(RUSAGE_CHILDREN, &rusage_children) == 0);
+ // The child has consumed some memory.
+ TEST_CHECK(rusage_self.ru_maxrss != 0);
+ // The child has no children of its own.
+ TEST_CHECK(rusage_children.ru_maxrss == 0);
+ _exit(0);
+ }
+ ASSERT_THAT(pid, SyscallSucceeds());
+ int status;
+ ASSERT_THAT(RetryEINTR(waitpid)(pid, &status, 0), SyscallSucceeds());
+ struct rusage rusage_self;
+ ASSERT_THAT(getrusage(RUSAGE_SELF, &rusage_self), SyscallSucceeds());
+ struct rusage rusage_children;
+ ASSERT_THAT(getrusage(RUSAGE_CHILDREN, &rusage_children), SyscallSucceeds());
+ // The parent has consumed some memory.
+ EXPECT_GT(rusage_self.ru_maxrss, 0);
+ // The child has consumed some memory, and because it has exited we can get
+ // its max RSS.
+ EXPECT_GT(rusage_children.ru_maxrss, 0);
+}
+
+// Verifies that a process can get the max resident set size of its grandchild,
+// i.e. that maxrss propagates correctly from children to waiting parents.
+TEST(GetrusageTest, Grandchild) {
+ constexpr int kGrandchildSizeKb = 1024;
+ pid_t pid = fork();
+ if (pid == 0) {
+ pid = fork();
+ if (pid == 0) {
+ int flags = MAP_ANONYMOUS | MAP_POPULATE | MAP_PRIVATE;
+ void *addr =
+ mmap(nullptr, kGrandchildSizeKb * 1024, PROT_WRITE, flags, -1, 0);
+ TEST_PCHECK(addr != MAP_FAILED);
+ } else {
+ int status;
+ TEST_PCHECK(RetryEINTR(waitpid)(pid, &status, 0) == pid);
+ }
+ _exit(0);
+ }
+ ASSERT_THAT(pid, SyscallSucceeds());
+ int status;
+ ASSERT_THAT(RetryEINTR(waitpid)(pid, &status, 0), SyscallSucceeds());
+ struct rusage rusage_self;
+ ASSERT_THAT(getrusage(RUSAGE_SELF, &rusage_self), SyscallSucceeds());
+ struct rusage rusage_children;
+ ASSERT_THAT(getrusage(RUSAGE_CHILDREN, &rusage_children), SyscallSucceeds());
+ // The parent has consumed some memory.
+ EXPECT_GT(rusage_self.ru_maxrss, 0);
+ // The child should consume next to no memory, but the grandchild will
+ // consume at least 1MB. Verify that usage bubbles up to the grandparent.
+ EXPECT_GT(rusage_children.ru_maxrss, kGrandchildSizeKb);
+}
+
+// Verifies that processes ignoring SIGCHLD do not have updated child maxrss
+// updated.
+TEST(GetrusageTest, IgnoreSIGCHLD) {
+ struct sigaction sa;
+ sa.sa_handler = SIG_IGN;
+ sa.sa_flags = 0;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGCHLD, sa));
+ pid_t pid = fork();
+ if (pid == 0) {
+ struct rusage rusage_self;
+ TEST_PCHECK(getrusage(RUSAGE_SELF, &rusage_self) == 0);
+ // The child has consumed some memory.
+ TEST_CHECK(rusage_self.ru_maxrss != 0);
+ _exit(0);
+ }
+ ASSERT_THAT(pid, SyscallSucceeds());
+ int status;
+ ASSERT_THAT(RetryEINTR(waitpid)(pid, &status, 0),
+ SyscallFailsWithErrno(ECHILD));
+ struct rusage rusage_self;
+ ASSERT_THAT(getrusage(RUSAGE_SELF, &rusage_self), SyscallSucceeds());
+ struct rusage rusage_children;
+ ASSERT_THAT(getrusage(RUSAGE_CHILDREN, &rusage_children), SyscallSucceeds());
+ // The parent has consumed some memory.
+ EXPECT_GT(rusage_self.ru_maxrss, 0);
+ // The child's maxrss should not have propagated up.
+ EXPECT_EQ(rusage_children.ru_maxrss, 0);
+}
+
+// Verifies that zombie processes do not update their parent's maxrss. Only
+// reaped processes should do this.
+TEST(GetrusageTest, IgnoreZombie) {
+ pid_t pid = fork();
+ if (pid == 0) {
+ struct rusage rusage_self;
+ TEST_PCHECK(getrusage(RUSAGE_SELF, &rusage_self) == 0);
+ struct rusage rusage_children;
+ TEST_PCHECK(getrusage(RUSAGE_CHILDREN, &rusage_children) == 0);
+ // The child has consumed some memory.
+ TEST_CHECK(rusage_self.ru_maxrss != 0);
+ // The child has no children of its own.
+ TEST_CHECK(rusage_children.ru_maxrss == 0);
+ _exit(0);
+ }
+ ASSERT_THAT(pid, SyscallSucceeds());
+ // Give the child time to exit. Because we don't call wait, the child should
+ // remain a zombie.
+ absl::SleepFor(absl::Seconds(5));
+ struct rusage rusage_self;
+ ASSERT_THAT(getrusage(RUSAGE_SELF, &rusage_self), SyscallSucceeds());
+ struct rusage rusage_children;
+ ASSERT_THAT(getrusage(RUSAGE_CHILDREN, &rusage_children), SyscallSucceeds());
+ // The parent has consumed some memory.
+ EXPECT_GT(rusage_self.ru_maxrss, 0);
+ // The child has consumed some memory, but hasn't been reaped.
+ EXPECT_EQ(rusage_children.ru_maxrss, 0);
+}
+
+TEST(GetrusageTest, Wait4) {
+ pid_t pid = fork();
+ if (pid == 0) {
+ struct rusage rusage_self;
+ TEST_PCHECK(getrusage(RUSAGE_SELF, &rusage_self) == 0);
+ struct rusage rusage_children;
+ TEST_PCHECK(getrusage(RUSAGE_CHILDREN, &rusage_children) == 0);
+ // The child has consumed some memory.
+ TEST_CHECK(rusage_self.ru_maxrss != 0);
+ // The child has no children of its own.
+ TEST_CHECK(rusage_children.ru_maxrss == 0);
+ _exit(0);
+ }
+ ASSERT_THAT(pid, SyscallSucceeds());
+ struct rusage rusage_children;
+ int status;
+ ASSERT_THAT(RetryEINTR(wait4)(pid, &status, 0, &rusage_children),
+ SyscallSucceeds());
+ // The child has consumed some memory, and because it has exited we can get
+ // its max RSS.
+ EXPECT_GT(rusage_children.ru_maxrss, 0);
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/inotify.cc b/test/syscalls/linux/inotify.cc
new file mode 100644
index 000000000..62fc55c72
--- /dev/null
+++ b/test/syscalls/linux/inotify.cc
@@ -0,0 +1,1489 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <libgen.h>
+#include <sys/inotify.h>
+#include <sys/ioctl.h>
+
+#include <list>
+#include <string>
+#include <vector>
+
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_format.h"
+#include "absl/strings/str_join.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+using ::absl::StreamFormat;
+using ::absl::StrFormat;
+
+constexpr int kBufSize = 1024;
+
+// C++-friendly version of struct inotify_event.
+struct Event {
+ int32_t wd;
+ uint32_t mask;
+ uint32_t cookie;
+ uint32_t len;
+ std::string name;
+
+ Event(uint32_t mask, int32_t wd, absl::string_view name, uint32_t cookie)
+ : wd(wd),
+ mask(mask),
+ cookie(cookie),
+ len(name.size()),
+ name(std::string(name)) {}
+ Event(uint32_t mask, int32_t wd, absl::string_view name)
+ : Event(mask, wd, name, 0) {}
+ Event(uint32_t mask, int32_t wd) : Event(mask, wd, "", 0) {}
+ Event() : Event(0, 0, "", 0) {}
+};
+
+// Prints the symbolic name for a struct inotify_event's 'mask' field.
+std::string FlagString(uint32_t flags) {
+ std::vector<std::string> names;
+
+#define EMIT(target) \
+ if (flags & target) { \
+ names.push_back(#target); \
+ flags &= ~target; \
+ }
+
+ EMIT(IN_ACCESS);
+ EMIT(IN_ATTRIB);
+ EMIT(IN_CLOSE_WRITE);
+ EMIT(IN_CLOSE_NOWRITE);
+ EMIT(IN_CREATE);
+ EMIT(IN_DELETE);
+ EMIT(IN_DELETE_SELF);
+ EMIT(IN_MODIFY);
+ EMIT(IN_MOVE_SELF);
+ EMIT(IN_MOVED_FROM);
+ EMIT(IN_MOVED_TO);
+ EMIT(IN_OPEN);
+
+ EMIT(IN_DONT_FOLLOW);
+ EMIT(IN_EXCL_UNLINK);
+ EMIT(IN_ONESHOT);
+ EMIT(IN_ONLYDIR);
+
+ EMIT(IN_IGNORED);
+ EMIT(IN_ISDIR);
+ EMIT(IN_Q_OVERFLOW);
+ EMIT(IN_UNMOUNT);
+
+#undef EMIT
+
+ // If we have anything left over at the end, print it as a hex value.
+ if (flags) {
+ names.push_back(absl::StrCat("0x", absl::Hex(flags)));
+ }
+
+ return absl::StrJoin(names, "|");
+}
+
+std::string DumpEvent(const Event& event) {
+ return StrFormat(
+ "%s, wd=%d%s%s", FlagString(event.mask), event.wd,
+ (event.len > 0) ? StrFormat(", name=%s", event.name) : "",
+ (event.cookie > 0) ? StrFormat(", cookie=%ud", event.cookie) : "");
+}
+
+std::string DumpEvents(const std::vector<Event>& events, int indent_level) {
+ std::stringstream ss;
+ ss << StreamFormat("%d event%s:\n", events.size(),
+ (events.size() > 1) ? "s" : "");
+ int i = 0;
+ for (const Event& ev : events) {
+ ss << StreamFormat("%sevents[%d]: %s\n", std::string(indent_level, '\t'), i++,
+ DumpEvent(ev));
+ }
+ return ss.str();
+}
+
+// A matcher which takes an expected list of events to match against another
+// list of inotify events, in order. This is similar to the ElementsAre matcher,
+// but displays more informative messages on mismatch.
+class EventsAreMatcher
+ : public ::testing::MatcherInterface<std::vector<Event>> {
+ public:
+ explicit EventsAreMatcher(std::vector<Event> references)
+ : references_(std::move(references)) {}
+
+ bool MatchAndExplain(
+ std::vector<Event> events,
+ ::testing::MatchResultListener* const listener) const override {
+ if (references_.size() != events.size()) {
+ *listener << StreamFormat("\n\tCount mismatch, got %s",
+ DumpEvents(events, 2));
+ return false;
+ }
+
+ bool success = true;
+ for (unsigned int i = 0; i < references_.size(); ++i) {
+ const Event& reference = references_[i];
+ const Event& target = events[i];
+
+ if (target.mask != reference.mask || target.wd != reference.wd ||
+ target.name != reference.name || target.cookie != reference.cookie) {
+ *listener << StreamFormat("\n\tMismatch at index %d, want %s, got %s,",
+ i, DumpEvent(reference), DumpEvent(target));
+ success = false;
+ }
+ }
+
+ if (!success) {
+ *listener << StreamFormat("\n\tIn total of %s", DumpEvents(events, 2));
+ }
+ return success;
+ }
+
+ void DescribeTo(::std::ostream* const os) const override {
+ *os << StreamFormat("%s", DumpEvents(references_, 1));
+ }
+
+ void DescribeNegationTo(::std::ostream* const os) const override {
+ *os << StreamFormat("mismatch from %s", DumpEvents(references_, 1));
+ }
+
+ private:
+ std::vector<Event> references_;
+};
+
+::testing::Matcher<std::vector<Event>> Are(std::vector<Event> events) {
+ return MakeMatcher(new EventsAreMatcher(std::move(events)));
+}
+
+// Similar to the EventsAre matcher, but the order of events are ignored.
+class UnorderedEventsAreMatcher
+ : public ::testing::MatcherInterface<std::vector<Event>> {
+ public:
+ explicit UnorderedEventsAreMatcher(std::vector<Event> references)
+ : references_(std::move(references)) {}
+
+ bool MatchAndExplain(
+ std::vector<Event> events,
+ ::testing::MatchResultListener* const listener) const override {
+ if (references_.size() != events.size()) {
+ *listener << StreamFormat("\n\tCount mismatch, got %s",
+ DumpEvents(events, 2));
+ return false;
+ }
+
+ std::vector<Event> unmatched(references_);
+
+ for (const Event& candidate : events) {
+ for (auto it = unmatched.begin(); it != unmatched.end();) {
+ const Event& reference = *it;
+ if (candidate.mask == reference.mask && candidate.wd == reference.wd &&
+ candidate.name == reference.name &&
+ candidate.cookie == reference.cookie) {
+ it = unmatched.erase(it);
+ break;
+ } else {
+ ++it;
+ }
+ }
+ }
+
+ // Anything left unmatched? If so, the matcher fails.
+ if (!unmatched.empty()) {
+ *listener << StreamFormat("\n\tFailed to match %s",
+ DumpEvents(unmatched, 2));
+ *listener << StreamFormat("\n\tIn total of %s", DumpEvents(events, 2));
+ return false;
+ }
+
+ return true;
+ }
+
+ void DescribeTo(::std::ostream* const os) const override {
+ *os << StreamFormat("unordered %s", DumpEvents(references_, 1));
+ }
+
+ void DescribeNegationTo(::std::ostream* const os) const override {
+ *os << StreamFormat("mismatch from unordered %s",
+ DumpEvents(references_, 1));
+ }
+
+ private:
+ std::vector<Event> references_;
+};
+
+::testing::Matcher<std::vector<Event>> AreUnordered(std::vector<Event> events) {
+ return MakeMatcher(new UnorderedEventsAreMatcher(std::move(events)));
+}
+
+// Reads events from an inotify fd until either EOF, or read returns EAGAIN.
+PosixErrorOr<std::vector<Event>> DrainEvents(int fd) {
+ std::vector<Event> events;
+ while (true) {
+ int events_size = 0;
+ if (ioctl(fd, FIONREAD, &events_size) < 0) {
+ return PosixError(errno, "ioctl(FIONREAD) failed on inotify fd");
+ }
+ // Deliberately use a buffer that is larger than necessary, expecting to
+ // only read events_size bytes.
+ std::vector<char> buf(events_size + kBufSize, 0);
+ const ssize_t readlen = read(fd, buf.data(), buf.size());
+ MaybeSave();
+ // Read error?
+ if (readlen < 0) {
+ if (errno == EAGAIN) {
+ // If EAGAIN, no more events at the moment. Return what we have so far.
+ return events;
+ }
+ // Some other read error. Return an error. Right now if we encounter this
+ // after already reading some events, they get lost. However, we don't
+ // expect to see any error, and the calling test will fail immediately if
+ // we signal an error anyways, so this is acceptable.
+ return PosixError(errno, "read() failed on inotify fd");
+ }
+ if (readlen < static_cast<int>(sizeof(struct inotify_event))) {
+ // Impossibly short read.
+ return PosixError(
+ EIO,
+ "read() didn't return enough data represent even a single event");
+ }
+ if (readlen != events_size) {
+ return PosixError(EINVAL, absl::StrCat("read ", readlen,
+ " bytes, expected ", events_size));
+ }
+ if (readlen == 0) {
+ // EOF.
+ return events;
+ }
+
+ // Normal read.
+ const char* cursor = buf.data();
+ while (cursor < (buf.data() + readlen)) {
+ struct inotify_event event = {};
+ memcpy(&event, cursor, sizeof(struct inotify_event));
+
+ Event ev;
+ ev.wd = event.wd;
+ ev.mask = event.mask;
+ ev.cookie = event.cookie;
+ ev.len = event.len;
+ if (event.len > 0) {
+ TEST_CHECK(static_cast<int>(sizeof(struct inotify_event) + event.len) <=
+ readlen);
+ ev.name =
+ std::string(cursor + offsetof(struct inotify_event, name)); // NOLINT
+ // Name field should always be smaller than event.len, otherwise we have
+ // a buffer overflow. The two sizes aren't equal because the std::string
+ // constructor will stop at the first null byte, while event.name may be
+ // padded up to event.len using multiple null bytes.
+ TEST_CHECK(ev.name.size() <= event.len);
+ }
+
+ events.push_back(ev);
+ cursor += sizeof(struct inotify_event) + event.len;
+ }
+ }
+}
+
+PosixErrorOr<FileDescriptor> InotifyInit1(int flags) {
+ int fd;
+ EXPECT_THAT(fd = inotify_init1(flags), SyscallSucceeds());
+ if (fd < 0) {
+ return PosixError(errno, "inotify_init1() failed");
+ }
+ return FileDescriptor(fd);
+}
+
+PosixErrorOr<int> InotifyAddWatch(int fd, const std::string& path, uint32_t mask) {
+ int wd;
+ EXPECT_THAT(wd = inotify_add_watch(fd, path.c_str(), mask),
+ SyscallSucceeds());
+ if (wd < 0) {
+ return PosixError(errno, "inotify_add_watch() failed");
+ }
+ return wd;
+}
+
+TEST(Inotify, InotifyFdNotWritable) {
+ const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(0));
+ EXPECT_THAT(write(fd.get(), "x", 1), SyscallFailsWithErrno(EBADF));
+}
+
+TEST(Inotify, NonBlockingReadReturnsEagain) {
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+ std::vector<char> buf(kBufSize, 0);
+
+ // The read below should return fail with EAGAIN because there is no data to
+ // read and we've specified IN_NONBLOCK. We're guaranteed that there is no
+ // data to read because we haven't registered any watches yet.
+ EXPECT_THAT(read(fd.get(), buf.data(), buf.size()),
+ SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST(Inotify, AddWatchOnInvalidFdFails) {
+ // Garbage fd.
+ EXPECT_THAT(inotify_add_watch(-1, "/tmp", IN_ALL_EVENTS),
+ SyscallFailsWithErrno(EBADF));
+ EXPECT_THAT(inotify_add_watch(1337, "/tmp", IN_ALL_EVENTS),
+ SyscallFailsWithErrno(EBADF));
+
+ // Non-inotify fds.
+ EXPECT_THAT(inotify_add_watch(0, "/tmp", IN_ALL_EVENTS),
+ SyscallFailsWithErrno(EINVAL));
+ EXPECT_THAT(inotify_add_watch(1, "/tmp", IN_ALL_EVENTS),
+ SyscallFailsWithErrno(EINVAL));
+ EXPECT_THAT(inotify_add_watch(2, "/tmp", IN_ALL_EVENTS),
+ SyscallFailsWithErrno(EINVAL));
+ const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open("/tmp", O_RDONLY));
+ EXPECT_THAT(inotify_add_watch(fd.get(), "/tmp", IN_ALL_EVENTS),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(Inotify, RemovingWatchGeneratesEvent) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+ const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+ EXPECT_THAT(inotify_rm_watch(fd.get(), wd), SyscallSucceeds());
+
+ // Read events, ensure the first event is IN_IGNORED.
+ const std::vector<Event> events =
+ ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ EXPECT_THAT(events, Are({Event(IN_IGNORED, wd)}));
+}
+
+TEST(Inotify, CanDeleteFileAfterRemovingWatch) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ TempPath file1 =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+ const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS));
+
+ EXPECT_THAT(inotify_rm_watch(fd.get(), wd), SyscallSucceeds());
+ file1.reset();
+}
+
+TEST(Inotify, CanRemoveWatchAfterDeletingFile) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ TempPath file1 =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+ const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS));
+
+ file1.reset();
+ const std::vector<Event> events =
+ ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ EXPECT_THAT(events, Are({Event(IN_ATTRIB, wd), Event(IN_DELETE_SELF, wd),
+ Event(IN_IGNORED, wd)}));
+
+ EXPECT_THAT(inotify_rm_watch(fd.get(), wd), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(Inotify, DuplicateWatchRemovalFails) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+ const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+
+ EXPECT_THAT(inotify_rm_watch(fd.get(), wd), SyscallSucceeds());
+ EXPECT_THAT(inotify_rm_watch(fd.get(), wd), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(Inotify, ConcurrentFileDeletionAndWatchRemoval) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+ const std::string filename = NewTempAbsPathInDir(root.path());
+
+ auto file_create_delete = [filename]() {
+ const DisableSave ds; // Too expensive.
+ for (int i = 0; i < 100; ++i) {
+ FileDescriptor file_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(filename, O_CREAT, S_IRUSR | S_IWUSR));
+ file_fd.reset(); // Close before unlinking (although save is disabled).
+ EXPECT_THAT(unlink(filename.c_str()), SyscallSucceeds());
+ }
+ };
+
+ const int shared_fd = fd.get(); // We need to pass it to the thread.
+ auto add_remove_watch = [shared_fd, filename]() {
+ for (int i = 0; i < 100; ++i) {
+ int wd = inotify_add_watch(shared_fd, filename.c_str(), IN_ALL_EVENTS);
+ MaybeSave();
+ if (wd != -1) {
+ // Watch added successfully, try removal.
+ if (inotify_rm_watch(shared_fd, wd)) {
+ // If removal fails, the only acceptable reason is if the wd
+ // is invalid, which will be the case if we try to remove
+ // the watch after the file has been deleted.
+ EXPECT_EQ(errno, EINVAL);
+ }
+ } else {
+ // Add watch failed, this should only fail if the target file doesn't
+ // exist.
+ EXPECT_EQ(errno, ENOENT);
+ }
+ }
+ };
+
+ ScopedThread t1(file_create_delete);
+ ScopedThread t2(add_remove_watch);
+}
+
+TEST(Inotify, DeletingChildGeneratesEvents) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ TempPath file1 =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+ const int root_wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+ const int file1_wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS));
+
+ const std::string file1_path = file1.reset();
+
+ const std::vector<Event> events =
+ ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ ASSERT_THAT(
+ events,
+ AreUnordered({Event(IN_ATTRIB, file1_wd), Event(IN_DELETE_SELF, file1_wd),
+ Event(IN_IGNORED, file1_wd),
+ Event(IN_DELETE, root_wd, Basename(file1_path))}));
+}
+
+TEST(Inotify, CreatingFileGeneratesEvents) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+ const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+
+ // Create a new file in the directory.
+ const TempPath file1 =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+ const std::vector<Event> events =
+ ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+
+ // The library function we use to create the new file opens it for writing to
+ // create it and sets permissions on it, so we expect the three extra events.
+ ASSERT_THAT(events, Are({Event(IN_CREATE, wd, Basename(file1.path())),
+ Event(IN_OPEN, wd, Basename(file1.path())),
+ Event(IN_CLOSE_WRITE, wd, Basename(file1.path())),
+ Event(IN_ATTRIB, wd, Basename(file1.path()))}));
+}
+
+TEST(Inotify, ReadingFileGeneratesAccessEvent) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+ const TempPath file1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ root.path(), "some content", TempPath::kDefaultFileMode));
+
+ const FileDescriptor file1_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY));
+ const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+
+ char buf;
+ EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds());
+
+ const std::vector<Event> events =
+ ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ ASSERT_THAT(events, Are({Event(IN_ACCESS, wd, Basename(file1.path()))}));
+}
+
+TEST(Inotify, WritingFileGeneratesModifyEvent) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+ const TempPath file1 =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+
+ const FileDescriptor file1_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_WRONLY));
+ const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+
+ const std::string data = "some content";
+ EXPECT_THAT(write(file1_fd.get(), data.c_str(), data.length()),
+ SyscallSucceeds());
+
+ const std::vector<Event> events =
+ ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ ASSERT_THAT(events, Are({Event(IN_MODIFY, wd, Basename(file1.path()))}));
+}
+
+TEST(Inotify, WatchSetAfterOpenReportsCloseFdEvent) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+ const TempPath file1 =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+
+ FileDescriptor file1_fd_writable =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_WRONLY));
+ FileDescriptor file1_fd_not_writable =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY));
+ const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+
+ file1_fd_writable.reset(); // Close file1_fd_writable.
+ std::vector<Event> events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ ASSERT_THAT(events, Are({Event(IN_CLOSE_WRITE, wd, Basename(file1.path()))}));
+
+ file1_fd_not_writable.reset(); // Close file1_fd_not_writable.
+ events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ ASSERT_THAT(events,
+ Are({Event(IN_CLOSE_NOWRITE, wd, Basename(file1.path()))}));
+}
+
+TEST(Inotify, ChildrenDeletionInWatchedDirGeneratesEvent) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+ TempPath file1 =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+ TempPath dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(root.path()));
+
+ const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+
+ const std::string file1_path = file1.reset();
+ const std::string dir1_path = dir1.release();
+ EXPECT_THAT(rmdir(dir1_path.c_str()), SyscallSucceeds());
+
+ const std::vector<Event> events =
+ ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+
+ ASSERT_THAT(events,
+ Are({Event(IN_DELETE, wd, Basename(file1_path)),
+ Event(IN_DELETE | IN_ISDIR, wd, Basename(dir1_path))}));
+}
+
+TEST(Inotify, WatchTargetDeletionGeneratesEvent) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+ const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+
+ EXPECT_THAT(rmdir(root.path().c_str()), SyscallSucceeds());
+ const std::vector<Event> events =
+ ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ ASSERT_THAT(events, Are({Event(IN_DELETE_SELF, wd), Event(IN_IGNORED, wd)}));
+}
+
+TEST(Inotify, MoveGeneratesEvents) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+ TempPath file1 =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+
+ const TempPath dir1 =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(root.path()));
+ const TempPath dir2 =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(root.path()));
+
+ const int root_wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+ const int dir1_wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), dir1.path(), IN_ALL_EVENTS));
+ const int dir2_wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), dir2.path(), IN_ALL_EVENTS));
+ // Test move from root -> root.
+ std::string newpath = NewTempAbsPathInDir(root.path());
+ std::string oldpath = file1.release();
+ EXPECT_THAT(rename(oldpath.c_str(), newpath.c_str()), SyscallSucceeds());
+ file1.reset(newpath);
+ std::vector<Event> events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ ASSERT_THAT(
+ events,
+ Are({Event(IN_MOVED_FROM, root_wd, Basename(oldpath), events[0].cookie),
+ Event(IN_MOVED_TO, root_wd, Basename(newpath), events[1].cookie)}));
+ EXPECT_NE(events[0].cookie, 0);
+ EXPECT_EQ(events[0].cookie, events[1].cookie);
+ uint32_t last_cookie = events[0].cookie;
+
+ // Test move from root -> root/dir1.
+ newpath = NewTempAbsPathInDir(dir1.path());
+ oldpath = file1.release();
+ EXPECT_THAT(rename(oldpath.c_str(), newpath.c_str()), SyscallSucceeds());
+ file1.reset(newpath);
+ events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ ASSERT_THAT(
+ events,
+ Are({Event(IN_MOVED_FROM, root_wd, Basename(oldpath), events[0].cookie),
+ Event(IN_MOVED_TO, dir1_wd, Basename(newpath), events[1].cookie)}));
+ // Cookies should be distinct between distinct rename events.
+ EXPECT_NE(events[0].cookie, last_cookie);
+ EXPECT_EQ(events[0].cookie, events[1].cookie);
+ last_cookie = events[0].cookie;
+
+ // Test move from root/dir1 -> root/dir2.
+ newpath = NewTempAbsPathInDir(dir2.path());
+ oldpath = file1.release();
+ EXPECT_THAT(rename(oldpath.c_str(), newpath.c_str()), SyscallSucceeds());
+ file1.reset(newpath);
+ events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ ASSERT_THAT(
+ events,
+ Are({Event(IN_MOVED_FROM, dir1_wd, Basename(oldpath), events[0].cookie),
+ Event(IN_MOVED_TO, dir2_wd, Basename(newpath), events[1].cookie)}));
+ EXPECT_NE(events[0].cookie, last_cookie);
+ EXPECT_EQ(events[0].cookie, events[1].cookie);
+ last_cookie = events[0].cookie;
+}
+
+TEST(Inotify, MoveWatchedTargetGeneratesEvents) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+ TempPath file1 =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+
+ const int root_wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+ const int file1_wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS));
+
+ const std::string newpath = NewTempAbsPathInDir(root.path());
+ const std::string oldpath = file1.release();
+ EXPECT_THAT(rename(oldpath.c_str(), newpath.c_str()), SyscallSucceeds());
+ file1.reset(newpath);
+ const std::vector<Event> events =
+ ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ ASSERT_THAT(
+ events,
+ Are({Event(IN_MOVED_FROM, root_wd, Basename(oldpath), events[0].cookie),
+ Event(IN_MOVED_TO, root_wd, Basename(newpath), events[1].cookie),
+ // Self move events do not have a cookie.
+ Event(IN_MOVE_SELF, file1_wd)}));
+ EXPECT_NE(events[0].cookie, 0);
+ EXPECT_EQ(events[0].cookie, events[1].cookie);
+}
+
+TEST(Inotify, CoalesceEvents) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+ const TempPath file1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ root.path(), "some content", TempPath::kDefaultFileMode));
+
+ FileDescriptor file1_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY));
+ const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+
+ // Read the file a few times. This will would generate multiple IN_ACCESS
+ // events but they should get coalesced to a single event.
+ char buf;
+ EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds());
+ EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds());
+ EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds());
+ EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds());
+
+ // Use the close event verify that we haven't simply left the additional
+ // IN_ACCESS events unread.
+ file1_fd.reset(); // Close file1_fd.
+
+ const std::string file1_name = std::string(Basename(file1.path()));
+ std::vector<Event> events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ ASSERT_THAT(events, Are({Event(IN_ACCESS, wd, file1_name),
+ Event(IN_CLOSE_NOWRITE, wd, file1_name)}));
+
+ // Now let's try interleaving other events into a stream of repeated events.
+ file1_fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDWR));
+
+ EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds());
+ EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds());
+ EXPECT_THAT(write(file1_fd.get(), "x", 1), SyscallSucceeds());
+ EXPECT_THAT(write(file1_fd.get(), "x", 1), SyscallSucceeds());
+ EXPECT_THAT(write(file1_fd.get(), "x", 1), SyscallSucceeds());
+ EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds());
+ EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds());
+
+ file1_fd.reset(); // Close the file.
+
+ events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ ASSERT_THAT(
+ events,
+ Are({Event(IN_OPEN, wd, file1_name), Event(IN_ACCESS, wd, file1_name),
+ Event(IN_MODIFY, wd, file1_name), Event(IN_ACCESS, wd, file1_name),
+ Event(IN_CLOSE_WRITE, wd, file1_name)}));
+
+ // Ensure events aren't coalesced if they are from different files.
+ const TempPath file2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ root.path(), "some content", TempPath::kDefaultFileMode));
+ // Discard events resulting from creation of file2.
+ ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+
+ file1_fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY));
+ FileDescriptor file2_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file2.path(), O_RDONLY));
+
+ EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds());
+ EXPECT_THAT(read(file2_fd.get(), &buf, 1), SyscallSucceeds());
+ EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds());
+ EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds());
+
+ // Close both files.
+ file1_fd.reset();
+ file2_fd.reset();
+
+ events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ const std::string file2_name = std::string(Basename(file2.path()));
+ ASSERT_THAT(
+ events,
+ Are({Event(IN_OPEN, wd, file1_name), Event(IN_OPEN, wd, file2_name),
+ Event(IN_ACCESS, wd, file1_name), Event(IN_ACCESS, wd, file2_name),
+ Event(IN_ACCESS, wd, file1_name),
+ Event(IN_CLOSE_NOWRITE, wd, file1_name),
+ Event(IN_CLOSE_NOWRITE, wd, file2_name)}));
+}
+
+TEST(Inotify, ClosingInotifyFdWithoutRemovingWatchesWorks) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+ const TempPath file1 =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+ const FileDescriptor file1_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY));
+
+ ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS));
+ // Note: The check on close will happen in FileDescriptor::~FileDescriptor().
+}
+
+TEST(Inotify, NestedWatches) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+ const TempPath file1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ root.path(), "some content", TempPath::kDefaultFileMode));
+ const FileDescriptor file1_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY));
+
+ const int root_wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+ const int file1_wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS));
+
+ // Read from file1. This should generate an event for both watches.
+ char buf;
+ EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds());
+
+ const std::vector<Event> events =
+ ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ ASSERT_THAT(events, Are({Event(IN_ACCESS, root_wd, Basename(file1.path())),
+ Event(IN_ACCESS, file1_wd)}));
+}
+
+TEST(Inotify, ConcurrentThreadsGeneratingEvents) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+ std::vector<TempPath> files;
+ files.reserve(10);
+ for (int i = 0; i < 10; i++) {
+ files.emplace_back(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ root.path(), "some content", TempPath::kDefaultFileMode)));
+ }
+
+ auto test_thread = [&files]() {
+ uint32_t seed = time(nullptr);
+ for (int i = 0; i < 20; i++) {
+ const TempPath& file = files[rand_r(&seed) % files.size()];
+ const FileDescriptor file_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_WRONLY));
+ TEST_PCHECK(write(file_fd.get(), "x", 1) == 1);
+ }
+ };
+
+ ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+
+ std::list<ScopedThread> threads;
+ for (int i = 0; i < 3; i++) {
+ threads.emplace_back(test_thread);
+ }
+ for (auto& t : threads) {
+ t.Join();
+ }
+
+ const std::vector<Event> events =
+ ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ // 3 threads doing 20 iterations, 3 events per iteration (open, write,
+ // close). However, some events may be coalesced, and we can't reliably
+ // predict how they'll be coalesced since the test threads aren't
+ // synchronized. We can only check that we aren't getting unexpected events.
+ for (const Event& ev : events) {
+ EXPECT_NE(ev.mask & (IN_OPEN | IN_MODIFY | IN_CLOSE_WRITE), 0);
+ }
+}
+
+TEST(Inotify, ReadWithTooSmallBufferFails) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const TempPath file1 =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+ const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS));
+
+ // Open the file to queue an event. This event will not have a filename, so
+ // reading from the inotify fd should return sizeof(struct inotify_event)
+ // bytes of data.
+ FileDescriptor file1_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY));
+ std::vector<char> buf(kBufSize, 0);
+ ssize_t readlen;
+
+ // Try a buffer too small to hold any potential event. This is rejected
+ // outright without the event being dequeued.
+ EXPECT_THAT(read(fd.get(), buf.data(), sizeof(struct inotify_event) - 1),
+ SyscallFailsWithErrno(EINVAL));
+ // Try a buffer just large enough. This should succeeed.
+ EXPECT_THAT(
+ readlen = read(fd.get(), buf.data(), sizeof(struct inotify_event)),
+ SyscallSucceeds());
+ EXPECT_EQ(readlen, sizeof(struct inotify_event));
+ // Event queue is now empty, the next read should return EAGAIN.
+ EXPECT_THAT(read(fd.get(), buf.data(), sizeof(struct inotify_event)),
+ SyscallFailsWithErrno(EAGAIN));
+
+ // Now put a watch on the directory, so that generated events contain a name.
+ EXPECT_THAT(inotify_rm_watch(fd.get(), wd), SyscallSucceeds());
+
+ // Drain the event generated from the watch removal.
+ ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+
+ ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+
+ file1_fd.reset(); // Close file to generate an event.
+
+ // Try a buffer too small to hold any event and one too small to hold an event
+ // with a name. These should both fail without consuming the event.
+ EXPECT_THAT(read(fd.get(), buf.data(), sizeof(struct inotify_event) - 1),
+ SyscallFailsWithErrno(EINVAL));
+ EXPECT_THAT(read(fd.get(), buf.data(), sizeof(struct inotify_event)),
+ SyscallFailsWithErrno(EINVAL));
+ // Now try with a large enough buffer. This should return the one event.
+ EXPECT_THAT(readlen = read(fd.get(), buf.data(), buf.size()),
+ SyscallSucceeds());
+ EXPECT_GE(readlen,
+ sizeof(struct inotify_event) + Basename(file1.path()).size());
+ // With the single event read, the queue should once again be empty.
+ EXPECT_THAT(read(fd.get(), buf.data(), sizeof(struct inotify_event)),
+ SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST(Inotify, BlockingReadOnInotifyFd) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(0));
+ const TempPath file1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ root.path(), "some content", TempPath::kDefaultFileMode));
+
+ const FileDescriptor file1_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY));
+
+ ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+
+ // Spawn a thread performing a blocking read for new events on the inotify fd.
+ std::vector<char> buf(kBufSize, 0);
+ const int shared_fd = fd.get(); // The thread needs it.
+ ScopedThread t([shared_fd, &buf]() {
+ ssize_t readlen;
+ EXPECT_THAT(readlen = read(shared_fd, buf.data(), buf.size()),
+ SyscallSucceeds());
+ });
+
+ // Perform a read on the watched file, which should generate an IN_ACCESS
+ // event, unblocking the event_reader thread.
+ char c;
+ EXPECT_THAT(read(file1_fd.get(), &c, 1), SyscallSucceeds());
+
+ // Wait for the thread to read the event and exit.
+ t.Join();
+
+ // Make sure the event we got back is sane.
+ uint32_t event_mask;
+ memcpy(&event_mask, buf.data() + offsetof(struct inotify_event, mask),
+ sizeof(event_mask));
+ EXPECT_EQ(event_mask, IN_ACCESS);
+}
+
+TEST(Inotify, WatchOnRelativePath) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+ const TempPath file1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ root.path(), "some content", TempPath::kDefaultFileMode));
+
+ const FileDescriptor file1_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY));
+
+ // Change working directory to root.
+ const char* old_working_dir = get_current_dir_name();
+ EXPECT_THAT(chdir(root.path().c_str()), SyscallSucceeds());
+
+ // Add a watch on file1 with a relative path.
+ const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), std::string(Basename(file1.path())), IN_ALL_EVENTS));
+
+ // Perform a read on file1, this should generate an IN_ACCESS event.
+ char c;
+ EXPECT_THAT(read(file1_fd.get(), &c, 1), SyscallSucceeds());
+
+ const std::vector<Event> events =
+ ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ EXPECT_THAT(events, Are({Event(IN_ACCESS, wd)}));
+
+ // Explicitly reset the working directory so that we don't continue to
+ // reference "root". Once the test ends, "root" will get unlinked. If we
+ // continue to hold a reference, random save/restore tests can fail if a save
+ // is triggered after "root" is unlinked; we can't save deleted fs objects
+ // with active references.
+ EXPECT_THAT(chdir(old_working_dir), SyscallSucceeds());
+}
+
+TEST(Inotify, ZeroLengthReadWriteDoesNotGenerateEvent) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+ const char kContent[] = "some content";
+ TempPath file1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ root.path(), kContent, TempPath::kDefaultFileMode));
+ const int kContentSize = sizeof(kContent) - 1;
+
+ const FileDescriptor file1_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDWR));
+
+ const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+
+ std::vector<char> buf(kContentSize, 0);
+ // Read all available data.
+ ssize_t readlen;
+ EXPECT_THAT(readlen = read(file1_fd.get(), buf.data(), kContentSize),
+ SyscallSucceeds());
+ EXPECT_EQ(readlen, kContentSize);
+ // Drain all events and make sure we got the IN_ACCESS for the read.
+ std::vector<Event> events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ EXPECT_THAT(events, Are({Event(IN_ACCESS, wd, Basename(file1.path()))}));
+
+ // Now try read again. This should be a 0-length read, since we're at EOF.
+ char c;
+ EXPECT_THAT(readlen = read(file1_fd.get(), &c, 1), SyscallSucceeds());
+ EXPECT_EQ(readlen, 0);
+ // We should have no new events.
+ events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ EXPECT_TRUE(events.empty());
+
+ // Try issuing a zero-length read.
+ EXPECT_THAT(readlen = read(file1_fd.get(), &c, 0), SyscallSucceeds());
+ EXPECT_EQ(readlen, 0);
+ // We should have no new events.
+ events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ EXPECT_TRUE(events.empty());
+
+ // Try issuing a zero-length write.
+ ssize_t writelen;
+ EXPECT_THAT(writelen = write(file1_fd.get(), &c, 0), SyscallSucceeds());
+ EXPECT_EQ(writelen, 0);
+ // We should have no new events.
+ events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ EXPECT_TRUE(events.empty());
+}
+
+TEST(Inotify, ChmodGeneratesAttribEvent_NoRandomSave) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const TempPath file1 =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+
+ const FileDescriptor root_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(root.path(), O_RDONLY));
+ const FileDescriptor file1_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDWR));
+ FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+ const int root_wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+ const int file1_wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS));
+
+ auto verify_chmod_events = [&]() {
+ std::vector<Event> events =
+ ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ ASSERT_THAT(events, Are({Event(IN_ATTRIB, root_wd, Basename(file1.path())),
+ Event(IN_ATTRIB, file1_wd)}));
+ };
+
+ // Don't do cooperative S/R tests for any of the {f}chmod* syscalls below, the
+ // test will always fail because nodes cannot be saved when they have stricted
+ // permissions than the original host node.
+ const DisableSave ds;
+
+ // Chmod.
+ ASSERT_THAT(chmod(file1.path().c_str(), S_IWGRP), SyscallSucceeds());
+ verify_chmod_events();
+
+ // Fchmod.
+ ASSERT_THAT(fchmod(file1_fd.get(), S_IRGRP | S_IWGRP), SyscallSucceeds());
+ verify_chmod_events();
+
+ // Fchmodat.
+ const std::string file1_basename = std::string(Basename(file1.path()));
+ ASSERT_THAT(fchmodat(root_fd.get(), file1_basename.c_str(), S_IWGRP, 0),
+ SyscallSucceeds());
+ verify_chmod_events();
+}
+
+TEST(Inotify, TruncateGeneratesModifyEvent) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const TempPath file1 =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+ const FileDescriptor file1_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDWR));
+
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+ const int root_wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+ const int file1_wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS));
+
+ auto verify_truncate_events = [&]() {
+ std::vector<Event> events =
+ ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ ASSERT_THAT(events, Are({Event(IN_MODIFY, root_wd, Basename(file1.path())),
+ Event(IN_MODIFY, file1_wd)}));
+ };
+
+ // Truncate.
+ EXPECT_THAT(truncate(file1.path().c_str(), 4096), SyscallSucceeds());
+ verify_truncate_events();
+
+ // Ftruncate.
+ EXPECT_THAT(ftruncate(file1_fd.get(), 8192), SyscallSucceeds());
+ verify_truncate_events();
+
+ // No events if truncate fails.
+ EXPECT_THAT(ftruncate(file1_fd.get(), -1), SyscallFailsWithErrno(EINVAL));
+ const std::vector<Event> events =
+ ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ ASSERT_THAT(events, Are({}));
+}
+
+TEST(Inotify, GetdentsGeneratesAccessEvent) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const TempPath file1 =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+ ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+ ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS));
+
+ // This internally calls getdents(2). We also expect to see an open/close
+ // event for the dirfd.
+ ASSERT_NO_ERRNO_AND_VALUE(ListDir(root.path(), false));
+ const std::vector<Event> events =
+ ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+
+ // Linux only seems to generate access events on getdents() on some
+ // calls. Allow the test to pass even if it isn't generated. gVisor will
+ // always generate the IN_ACCESS event so the test will at least ensure gVisor
+ // behaves reasonably.
+ int i = 0;
+ EXPECT_EQ(events[i].mask, IN_OPEN | IN_ISDIR);
+ ++i;
+ if (IsRunningOnGvisor()) {
+ EXPECT_EQ(events[i].mask, IN_ACCESS | IN_ISDIR);
+ ++i;
+ } else {
+ if (events[i].mask == (IN_ACCESS | IN_ISDIR)) {
+ // Skip over the IN_ACCESS event on Linux, it only shows up some of the
+ // time so we can't assert its existence.
+ ++i;
+ }
+ }
+ EXPECT_EQ(events[i].mask, IN_CLOSE_NOWRITE | IN_ISDIR);
+}
+
+TEST(Inotify, MknodGeneratesCreateEvent) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+ const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+
+ const TempPath file1(root.path() + "/file1");
+ const int rc = mknod(file1.path().c_str(), S_IFREG, 0);
+ // mknod(2) is only supported on tmpfs in the sandbox.
+ SKIP_IF(IsRunningOnGvisor() && rc != 0);
+ ASSERT_THAT(rc, SyscallSucceeds());
+
+ const std::vector<Event> events =
+ ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ ASSERT_THAT(events, Are({Event(IN_CREATE, wd, Basename(file1.path()))}));
+}
+
+TEST(Inotify, SymlinkGeneratesCreateEvent) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const TempPath file1 =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+ const TempPath link1(NewTempAbsPathInDir(root.path()));
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+ const int root_wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+ ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS));
+
+ ASSERT_THAT(symlink(file1.path().c_str(), link1.path().c_str()),
+ SyscallSucceeds());
+
+ const std::vector<Event> events =
+ ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+
+ ASSERT_THAT(events, Are({Event(IN_CREATE, root_wd, Basename(link1.path()))}));
+}
+
+TEST(Inotify, LinkGeneratesAttribAndCreateEvents) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const TempPath file1 =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+ const TempPath link1(root.path() + "/link1");
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+ const int root_wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+ const int file1_wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS));
+
+ const int rc = link(file1.path().c_str(), link1.path().c_str());
+ // link(2) is only supported on tmpfs in the sandbox.
+ SKIP_IF(IsRunningOnGvisor() && rc != 0 && errno == EPERM);
+ ASSERT_THAT(rc, SyscallSucceeds());
+
+ const std::vector<Event> events =
+ ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ ASSERT_THAT(events, Are({Event(IN_ATTRIB, file1_wd),
+ Event(IN_CREATE, root_wd, Basename(link1.path()))}));
+}
+
+TEST(Inotify, HardlinksReuseSameWatch) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ TempPath file1 =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+ TempPath link1(root.path() + "/link1");
+ const int rc = link(file1.path().c_str(), link1.path().c_str());
+ // link(2) is only supported on tmpfs in the sandbox.
+ SKIP_IF(IsRunningOnGvisor() && rc != 0 && errno == EPERM);
+ ASSERT_THAT(rc, SyscallSucceeds());
+
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+ const int root_wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+ const int file1_wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS));
+ const int link1_wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), link1.path(), IN_ALL_EVENTS));
+
+ // The watch descriptors for watches on different links to the same file
+ // should be identical.
+ EXPECT_NE(root_wd, file1_wd);
+ EXPECT_EQ(file1_wd, link1_wd);
+
+ FileDescriptor file1_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_WRONLY));
+
+ std::vector<Event> events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ ASSERT_THAT(events,
+ AreUnordered({Event(IN_OPEN, root_wd, Basename(file1.path())),
+ Event(IN_OPEN, file1_wd)}));
+
+ // For the next step, we want to ensure all fds to the file are closed. Do
+ // that now and drain the resulting events.
+ file1_fd.reset();
+ events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ ASSERT_THAT(events,
+ Are({Event(IN_CLOSE_WRITE, root_wd, Basename(file1.path())),
+ Event(IN_CLOSE_WRITE, file1_wd)}));
+
+ // Try removing the link and let's see what events show up. Note that after
+ // this, we still have a link to the file so the watch shouldn't be
+ // automatically removed.
+ const std::string link1_path = link1.reset();
+
+ events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ ASSERT_THAT(events, Are({Event(IN_ATTRIB, link1_wd),
+ Event(IN_DELETE, root_wd, Basename(link1_path))}));
+
+ // Now remove the other link. Since this is the last link to the file, the
+ // watch should be automatically removed.
+ const std::string file1_path = file1.reset();
+
+ events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ ASSERT_THAT(
+ events,
+ AreUnordered({Event(IN_ATTRIB, file1_wd), Event(IN_DELETE_SELF, file1_wd),
+ Event(IN_IGNORED, file1_wd),
+ Event(IN_DELETE, root_wd, Basename(file1_path))}));
+}
+
+TEST(Inotify, MkdirGeneratesCreateEventWithDirFlag) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+ const int root_wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+
+ const TempPath dir1(NewTempAbsPathInDir(root.path()));
+ ASSERT_THAT(mkdir(dir1.path().c_str(), 0777), SyscallSucceeds());
+
+ const std::vector<Event> events =
+ ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ ASSERT_THAT(
+ events,
+ Are({Event(IN_CREATE | IN_ISDIR, root_wd, Basename(dir1.path()))}));
+}
+
+TEST(Inotify, MultipleInotifyInstancesAndWatchesAllGetEvents) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const TempPath file1 =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+
+ const FileDescriptor file1_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_WRONLY));
+ constexpr int kNumFds = 30;
+ std::vector<FileDescriptor> inotify_fds;
+
+ for (int i = 0; i < kNumFds; ++i) {
+ const DisableSave ds; // Too expensive.
+ inotify_fds.emplace_back(
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)));
+ const FileDescriptor& fd = inotify_fds[inotify_fds.size() - 1]; // Back.
+ ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+ ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS));
+ }
+
+ const std::string data = "some content";
+ EXPECT_THAT(write(file1_fd.get(), data.c_str(), data.length()),
+ SyscallSucceeds());
+
+ for (const FileDescriptor& fd : inotify_fds) {
+ const DisableSave ds; // Too expensive.
+ const std::vector<Event> events =
+ ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ if (events.size() >= 2) {
+ EXPECT_EQ(events[0].mask, IN_MODIFY);
+ EXPECT_EQ(events[0].wd, 1);
+ EXPECT_EQ(events[0].name, Basename(file1.path()));
+ EXPECT_EQ(events[1].mask, IN_MODIFY);
+ EXPECT_EQ(events[1].wd, 2);
+ EXPECT_EQ(events[1].name, "");
+ }
+ }
+}
+
+TEST(Inotify, EventsGoUpAtMostOneLevel) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const TempPath dir1 =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(root.path()));
+ TempPath file1 =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir1.path()));
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+ ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS));
+ const int dir1_wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), dir1.path(), IN_ALL_EVENTS));
+
+ const std::string file1_path = file1.reset();
+
+ const std::vector<Event> events =
+ ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ ASSERT_THAT(events, Are({Event(IN_DELETE, dir1_wd, Basename(file1_path))}));
+}
+
+TEST(Inotify, DuplicateWatchReturnsSameWatchDescriptor) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const TempPath file1 =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+ const int wd1 = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS));
+ const int wd2 = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS));
+
+ EXPECT_EQ(wd1, wd2);
+
+ const FileDescriptor file1_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_WRONLY));
+ const std::vector<Event> events =
+ ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ // The watch shouldn't be duplicated, we only expect one event.
+ ASSERT_THAT(events, Are({Event(IN_OPEN, wd1)}));
+}
+
+TEST(Inotify, UnmatchedEventsAreDiscarded) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const TempPath file1 =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch(fd.get(), file1.path(), IN_ACCESS));
+
+ const FileDescriptor file1_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_WRONLY));
+
+ const std::vector<Event> events =
+ ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ // We only asked for access events, the open event should be discarded.
+ ASSERT_THAT(events, Are({}));
+}
+
+TEST(Inotify, AddWatchWithInvalidEventMaskFails) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+ EXPECT_THAT(inotify_add_watch(fd.get(), root.path().c_str(), 0),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(Inotify, AddWatchOnInvalidPathFails) {
+ const TempPath nonexistent(NewTempAbsPath());
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+ // Non-existent path.
+ EXPECT_THAT(
+ inotify_add_watch(fd.get(), nonexistent.path().c_str(), IN_CREATE),
+ SyscallFailsWithErrno(ENOENT));
+
+ // Garbage path pointer.
+ EXPECT_THAT(inotify_add_watch(fd.get(), nullptr, IN_CREATE),
+ SyscallFailsWithErrno(EFAULT));
+}
+
+TEST(Inotify, InOnlyDirFlagRespected) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const TempPath file1 =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+ EXPECT_THAT(
+ inotify_add_watch(fd.get(), root.path().c_str(), IN_ACCESS | IN_ONLYDIR),
+ SyscallSucceeds());
+
+ EXPECT_THAT(
+ inotify_add_watch(fd.get(), file1.path().c_str(), IN_ACCESS | IN_ONLYDIR),
+ SyscallFailsWithErrno(ENOTDIR));
+}
+
+TEST(Inotify, MaskAddMergesWithExistingEventMask) {
+ const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const TempPath file1 =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+ FileDescriptor file1_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_WRONLY));
+
+ const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), file1.path(), IN_OPEN | IN_CLOSE_WRITE));
+
+ const std::string data = "some content";
+ EXPECT_THAT(write(file1_fd.get(), data.c_str(), data.length()),
+ SyscallSucceeds());
+
+ // We shouldn't get any events, since IN_MODIFY wasn't in the event mask.
+ std::vector<Event> events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ ASSERT_THAT(events, Are({}));
+
+ // Add IN_MODIFY to event mask.
+ ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), file1.path(), IN_MODIFY | IN_MASK_ADD));
+
+ EXPECT_THAT(write(file1_fd.get(), data.c_str(), data.length()),
+ SyscallSucceeds());
+
+ // This time we should get the modify event.
+ events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ ASSERT_THAT(events, Are({Event(IN_MODIFY, wd)}));
+
+ // Now close the fd. If the modify event was added to the event mask rather
+ // than replacing the event mask we won't get the close event.
+ file1_fd.reset();
+ events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ ASSERT_THAT(events, Are({Event(IN_CLOSE_WRITE, wd)}));
+}
+
+} // namespace
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/ioctl.cc b/test/syscalls/linux/ioctl.cc
new file mode 100644
index 000000000..bee0ba1b3
--- /dev/null
+++ b/test/syscalls/linux/ioctl.cc
@@ -0,0 +1,375 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <net/if.h>
+#include <netdb.h>
+#include <signal.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+bool CheckNonBlocking(int fd) {
+ int ret = fcntl(fd, F_GETFL, 0);
+ TEST_CHECK(ret != -1);
+ return (ret & O_NONBLOCK) == O_NONBLOCK;
+}
+
+bool CheckCloExec(int fd) {
+ int ret = fcntl(fd, F_GETFD, 0);
+ TEST_CHECK(ret != -1);
+ return (ret & FD_CLOEXEC) == FD_CLOEXEC;
+}
+
+class IoctlTest : public ::testing::Test {
+ protected:
+ void SetUp() override {
+ ASSERT_THAT(fd_ = open("/dev/null", O_RDONLY), SyscallSucceeds());
+ }
+
+ void TearDown() override {
+ if (fd_ >= 0) {
+ ASSERT_THAT(close(fd_), SyscallSucceeds());
+ fd_ = -1;
+ }
+ }
+
+ int fd() const { return fd_; }
+
+ private:
+ int fd_ = -1;
+};
+
+TEST_F(IoctlTest, BadFileDescriptor) {
+ EXPECT_THAT(ioctl(-1 /* fd */, 0), SyscallFailsWithErrno(EBADF));
+}
+
+TEST_F(IoctlTest, InvalidControlNumber) {
+ EXPECT_THAT(ioctl(STDOUT_FILENO, 0), SyscallFailsWithErrno(ENOTTY));
+}
+
+TEST_F(IoctlTest, FIONBIOSucceeds) {
+ EXPECT_FALSE(CheckNonBlocking(fd()));
+ int set = 1;
+ EXPECT_THAT(ioctl(fd(), FIONBIO, &set), SyscallSucceeds());
+ EXPECT_TRUE(CheckNonBlocking(fd()));
+ set = 0;
+ EXPECT_THAT(ioctl(fd(), FIONBIO, &set), SyscallSucceeds());
+ EXPECT_FALSE(CheckNonBlocking(fd()));
+}
+
+TEST_F(IoctlTest, FIONBIOFails) {
+ EXPECT_THAT(ioctl(fd(), FIONBIO, nullptr), SyscallFailsWithErrno(EFAULT));
+}
+
+TEST_F(IoctlTest, FIONCLEXSucceeds) {
+ EXPECT_THAT(ioctl(fd(), FIONCLEX), SyscallSucceeds());
+ EXPECT_FALSE(CheckCloExec(fd()));
+}
+
+TEST_F(IoctlTest, FIOCLEXSucceeds) {
+ EXPECT_THAT(ioctl(fd(), FIOCLEX), SyscallSucceeds());
+ EXPECT_TRUE(CheckCloExec(fd()));
+}
+
+TEST_F(IoctlTest, FIOASYNCFails) {
+ EXPECT_THAT(ioctl(fd(), FIOASYNC, nullptr), SyscallFailsWithErrno(EFAULT));
+}
+
+TEST_F(IoctlTest, FIOASYNCSucceeds) {
+ // Not all FDs support FIOASYNC.
+ const FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+ Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+ int before = -1;
+ ASSERT_THAT(before = fcntl(s.get(), F_GETFL), SyscallSucceeds());
+
+ int set = 1;
+ EXPECT_THAT(ioctl(s.get(), FIOASYNC, &set), SyscallSucceeds());
+
+ int after_set = -1;
+ ASSERT_THAT(after_set = fcntl(s.get(), F_GETFL), SyscallSucceeds());
+ EXPECT_EQ(after_set, before | O_ASYNC) << "before was " << before;
+
+ set = 0;
+ EXPECT_THAT(ioctl(s.get(), FIOASYNC, &set), SyscallSucceeds());
+
+ ASSERT_THAT(fcntl(s.get(), F_GETFL), SyscallSucceedsWithValue(before));
+}
+
+/* Count of the number of SIGIOs handled. */
+static volatile int io_received = 0;
+
+void inc_io_handler(int sig, siginfo_t* siginfo, void* arg) { io_received++; }
+
+TEST_F(IoctlTest, FIOASYNCNoTarget) {
+ auto pair =
+ ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+ // Count SIGIOs received.
+ io_received = 0;
+ struct sigaction sa;
+ sa.sa_sigaction = inc_io_handler;
+ sigfillset(&sa.sa_mask);
+ sa.sa_flags = SA_RESTART;
+ auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGIO, sa));
+
+ // Actually allow SIGIO delivery.
+ auto mask_cleanup =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGIO));
+
+ int set = 1;
+ EXPECT_THAT(ioctl(pair->second_fd(), FIOASYNC, &set), SyscallSucceeds());
+
+ constexpr char kData[] = "abc";
+ ASSERT_THAT(WriteFd(pair->first_fd(), kData, sizeof(kData)),
+ SyscallSucceedsWithValue(sizeof(kData)));
+
+ EXPECT_EQ(io_received, 0);
+}
+
+TEST_F(IoctlTest, FIOASYNCSelfTarget) {
+ // FIXME: gVisor erroneously sends SIGIO on close(2), which would
+ // kill the test when pair goes out of scope. Temporarily ignore SIGIO so that
+ // that the close signal is ignored.
+ struct sigaction sa;
+ sa.sa_handler = SIG_IGN;
+ auto early_sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGIO, sa));
+
+ auto pair =
+ ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+ // Count SIGIOs received.
+ io_received = 0;
+ sa.sa_sigaction = inc_io_handler;
+ sigfillset(&sa.sa_mask);
+ sa.sa_flags = SA_RESTART;
+ auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGIO, sa));
+
+ // Actually allow SIGIO delivery.
+ auto mask_cleanup =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGIO));
+
+ int set = 1;
+ EXPECT_THAT(ioctl(pair->second_fd(), FIOASYNC, &set), SyscallSucceeds());
+
+ pid_t pid = getpid();
+ EXPECT_THAT(ioctl(pair->second_fd(), FIOSETOWN, &pid), SyscallSucceeds());
+
+ constexpr char kData[] = "abc";
+ ASSERT_THAT(WriteFd(pair->first_fd(), kData, sizeof(kData)),
+ SyscallSucceedsWithValue(sizeof(kData)));
+
+ EXPECT_EQ(io_received, 1);
+}
+
+// Equivalent to FIOASYNCSelfTarget except that FIOSETOWN is called before
+// FIOASYNC.
+TEST_F(IoctlTest, FIOASYNCSelfTarget2) {
+ // FIXME: gVisor erroneously sends SIGIO on close(2), which would
+ // kill the test when pair goes out of scope. Temporarily ignore SIGIO so that
+ // that the close signal is ignored.
+ struct sigaction sa;
+ sa.sa_handler = SIG_IGN;
+ auto early_sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGIO, sa));
+
+ auto pair =
+ ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+ // Count SIGIOs received.
+ io_received = 0;
+ sa.sa_sigaction = inc_io_handler;
+ sigfillset(&sa.sa_mask);
+ sa.sa_flags = SA_RESTART;
+ auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGIO, sa));
+
+ // Actually allow SIGIO delivery.
+ auto mask_cleanup =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGIO));
+
+ pid_t pid = getpid();
+ EXPECT_THAT(ioctl(pair->second_fd(), FIOSETOWN, &pid), SyscallSucceeds());
+
+ int set = 1;
+ EXPECT_THAT(ioctl(pair->second_fd(), FIOASYNC, &set), SyscallSucceeds());
+
+ constexpr char kData[] = "abc";
+ ASSERT_THAT(WriteFd(pair->first_fd(), kData, sizeof(kData)),
+ SyscallSucceedsWithValue(sizeof(kData)));
+
+ EXPECT_EQ(io_received, 1);
+}
+
+TEST_F(IoctlTest, FIOASYNCInvalidPID) {
+ auto pair =
+ ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+ int set = 1;
+ ASSERT_THAT(ioctl(pair->second_fd(), FIOASYNC, &set), SyscallSucceeds());
+ pid_t pid = INT_MAX;
+ // This succeeds (with behavior equivalent to a pid of 0) in Linux prior to
+ // f73127356f34 "fs/fcntl: return -ESRCH in f_setown when pid/pgid can't be
+ // found", and fails with EPERM after that commit.
+ EXPECT_THAT(ioctl(pair->second_fd(), FIOSETOWN, &pid),
+ AnyOf(SyscallSucceeds(), SyscallFailsWithErrno(ESRCH)));
+}
+
+TEST_F(IoctlTest, FIOASYNCUnsetTarget) {
+ auto pair =
+ ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+ // Count SIGIOs received.
+ io_received = 0;
+ struct sigaction sa;
+ sa.sa_sigaction = inc_io_handler;
+ sigfillset(&sa.sa_mask);
+ sa.sa_flags = SA_RESTART;
+ auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGIO, sa));
+
+ // Actually allow SIGIO delivery.
+ auto mask_cleanup =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGIO));
+
+ int set = 1;
+ EXPECT_THAT(ioctl(pair->second_fd(), FIOASYNC, &set), SyscallSucceeds());
+
+ pid_t pid = getpid();
+ EXPECT_THAT(ioctl(pair->second_fd(), FIOSETOWN, &pid), SyscallSucceeds());
+
+ // Passing a PID of 0 unsets the target.
+ pid = 0;
+ EXPECT_THAT(ioctl(pair->second_fd(), FIOSETOWN, &pid), SyscallSucceeds());
+
+ constexpr char kData[] = "abc";
+ ASSERT_THAT(WriteFd(pair->first_fd(), kData, sizeof(kData)),
+ SyscallSucceedsWithValue(sizeof(kData)));
+
+ EXPECT_EQ(io_received, 0);
+}
+
+using IoctlTestSIOCGIFCONF = SimpleSocketTest;
+
+TEST_P(IoctlTestSIOCGIFCONF, ValidateNoArrayGetsLength) {
+ auto fd = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+ // Validate that no array can be used to get the length required.
+ struct ifconf ifconf = {};
+ ASSERT_THAT(ioctl(fd->get(), SIOCGIFCONF, &ifconf), SyscallSucceeds());
+ ASSERT_GT(ifconf.ifc_len, 0);
+}
+
+// This test validates that we will only return a partial array list and not
+// partial ifrreq structs.
+TEST_P(IoctlTestSIOCGIFCONF, ValidateNoPartialIfrsReturned) {
+ auto fd = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+ struct ifreq ifr = {};
+ struct ifconf ifconf = {};
+ ifconf.ifc_len = sizeof(ifr) - 1; // One byte too few.
+ ifconf.ifc_ifcu.ifcu_req = &ifr;
+
+ ASSERT_THAT(ioctl(fd->get(), SIOCGIFCONF, &ifconf), SyscallSucceeds());
+ ASSERT_EQ(ifconf.ifc_len, 0);
+ ASSERT_EQ(ifr.ifr_name[0], '\0'); // Nothing is returned.
+
+ ifconf.ifc_len = sizeof(ifreq);
+ ASSERT_THAT(ioctl(fd->get(), SIOCGIFCONF, &ifconf), SyscallSucceeds());
+ ASSERT_GT(ifconf.ifc_len, 0);
+ ASSERT_NE(ifr.ifr_name[0], '\0'); // An interface can now be returned.
+}
+
+TEST_P(IoctlTestSIOCGIFCONF, ValidateLoopbackIsPresent) {
+ auto fd = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+ struct ifconf ifconf = {};
+ struct ifreq ifr[10] = {}; // Storage for up to 10 interfaces.
+
+ ifconf.ifc_req = ifr;
+ ifconf.ifc_len = sizeof(ifr);
+
+ ASSERT_THAT(ioctl(fd->get(), SIOCGIFCONF, &ifconf), SyscallSucceeds());
+ size_t num_if = ifconf.ifc_len / sizeof(struct ifreq);
+
+ // We should have at least one interface.
+ ASSERT_GE(num_if, 1);
+
+ // One of the interfaces should be a loopback.
+ bool found_loopback = false;
+ for (size_t i = 0; i < num_if; ++i) {
+ if (strcmp(ifr[i].ifr_name, "lo") == 0) {
+ // SIOCGIFCONF returns the ipv4 address of the interface, let's check it.
+ ASSERT_EQ(ifr[i].ifr_addr.sa_family, AF_INET);
+
+ // Validate the address is correct for loopback.
+ sockaddr_in* sin = reinterpret_cast<sockaddr_in*>(&ifr[i].ifr_addr);
+ ASSERT_EQ(htonl(sin->sin_addr.s_addr), INADDR_LOOPBACK);
+
+ found_loopback = true;
+ break;
+ }
+ }
+ ASSERT_TRUE(found_loopback);
+}
+
+std::vector<SocketKind> IoctlSocketTypes() {
+ return {SimpleSocket(AF_UNIX, SOCK_STREAM, 0),
+ SimpleSocket(AF_UNIX, SOCK_DGRAM, 0),
+ SimpleSocket(AF_INET, SOCK_STREAM, 0),
+ SimpleSocket(AF_INET6, SOCK_STREAM, 0),
+ SimpleSocket(AF_INET, SOCK_DGRAM, 0),
+ SimpleSocket(AF_INET6, SOCK_DGRAM, 0)};
+}
+
+INSTANTIATE_TEST_CASE_P(IoctlTest, IoctlTestSIOCGIFCONF,
+ ::testing::ValuesIn(IoctlSocketTypes()));
+
+} // namespace
+
+TEST_F(IoctlTest, FIOGETOWNSucceeds) {
+ const FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+ Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+ int get = -1;
+ ASSERT_THAT(ioctl(s.get(), FIOGETOWN, &get), SyscallSucceeds());
+ EXPECT_EQ(get, 0);
+}
+
+TEST_F(IoctlTest, SIOCGPGRPSucceeds) {
+ const FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+ Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+ int get = -1;
+ ASSERT_THAT(ioctl(s.get(), SIOCGPGRP, &get), SyscallSucceeds());
+ EXPECT_EQ(get, 0);
+}
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/ip_socket_test_util.cc b/test/syscalls/linux/ip_socket_test_util.cc
new file mode 100644
index 000000000..1659d3d83
--- /dev/null
+++ b/test/syscalls/linux/ip_socket_test_util.cc
@@ -0,0 +1,78 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/ip_socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+std::string DescribeSocketType(int type) {
+ return absl::StrCat(((type & SOCK_NONBLOCK) != 0) ? "non-blocking " : "",
+ ((type & SOCK_CLOEXEC) != 0) ? "close-on-exec " : "");
+}
+
+} // namespace
+
+SocketPairKind IPv6TCPAcceptBindSocketPair(int type) {
+ std::string description =
+ absl::StrCat(DescribeSocketType(type), "IPv6 TCP socket");
+ return SocketPairKind{
+ description, TCPAcceptBindSocketPairCreator(AF_INET6, type | SOCK_STREAM,
+ 0, /* dual_stack = */ false)};
+}
+
+SocketPairKind IPv4TCPAcceptBindSocketPair(int type) {
+ std::string description =
+ absl::StrCat(DescribeSocketType(type), "IPv4 TCP socket");
+ return SocketPairKind{
+ description, TCPAcceptBindSocketPairCreator(AF_INET, type | SOCK_STREAM,
+ 0, /* dual_stack = */ false)};
+}
+
+SocketPairKind DualStackTCPAcceptBindSocketPair(int type) {
+ std::string description =
+ absl::StrCat(DescribeSocketType(type), "dual stack TCP socket");
+ return SocketPairKind{
+ description, TCPAcceptBindSocketPairCreator(AF_INET6, type | SOCK_STREAM,
+ 0, /* dual_stack = */ true)};
+}
+
+SocketPairKind IPv6UDPBidirectionalBindSocketPair(int type) {
+ std::string description =
+ absl::StrCat(DescribeSocketType(type), "IPv6 UDP socket");
+ return SocketPairKind{description, UDPBidirectionalBindSocketPairCreator(
+ AF_INET6, type | SOCK_DGRAM, 0,
+ /* dual_stack = */ false)};
+}
+
+SocketPairKind IPv4UDPBidirectionalBindSocketPair(int type) {
+ std::string description =
+ absl::StrCat(DescribeSocketType(type), "IPv4 UDP socket");
+ return SocketPairKind{description, UDPBidirectionalBindSocketPairCreator(
+ AF_INET, type | SOCK_DGRAM, 0,
+ /* dual_stack = */ false)};
+}
+
+SocketPairKind DualStackUDPBidirectionalBindSocketPair(int type) {
+ std::string description =
+ absl::StrCat(DescribeSocketType(type), "dual stack UDP socket");
+ return SocketPairKind{description, UDPBidirectionalBindSocketPairCreator(
+ AF_INET6, type | SOCK_DGRAM, 0,
+ /* dual_stack = */ true)};
+}
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/ip_socket_test_util.h b/test/syscalls/linux/ip_socket_test_util.h
new file mode 100644
index 000000000..1e1400ecd
--- /dev/null
+++ b/test/syscalls/linux/ip_socket_test_util.h
@@ -0,0 +1,57 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_IP_SOCKET_TEST_UTIL_H_
+#define GVISOR_TEST_SYSCALLS_IP_SOCKET_TEST_UTIL_H_
+
+#include <string>
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// IPv6TCPAcceptBindSocketPair returns a SocketPairKind that represents
+// SocketPairs created with bind() and accept() syscalls with AF_INET6 and the
+// given type bound to the IPv6 loopback.
+SocketPairKind IPv6TCPAcceptBindSocketPair(int type);
+
+// IPv4TCPAcceptBindSocketPair returns a SocketPairKind that represents
+// SocketPairs created with bind() and accept() syscalls with AF_INET and the
+// given type bound to the IPv4 loopback.
+SocketPairKind IPv4TCPAcceptBindSocketPair(int type);
+
+// DualStackTCPAcceptBindSocketPair returns a SocketPairKind that represents
+// SocketPairs created with bind() and accept() syscalls with AF_INET6 and the
+// given type bound to the IPv4 loopback.
+SocketPairKind DualStackTCPAcceptBindSocketPair(int type);
+
+// IPv6UDPBidirectionalBindSocketPair returns a SocketPairKind that represents
+// SocketPairs created with bind() and connect() syscalls with AF_INET6 and the
+// given type bound to the IPv6 loopback.
+SocketPairKind IPv6UDPBidirectionalBindSocketPair(int type);
+
+// IPv4UDPBidirectionalBindSocketPair returns a SocketPairKind that represents
+// SocketPairs created with bind() and connect() syscalls with AF_INET and the
+// given type bound to the IPv4 loopback.
+SocketPairKind IPv4UDPBidirectionalBindSocketPair(int type);
+
+// DualStackUDPBidirectionalBindSocketPair returns a SocketPairKind that
+// represents SocketPairs created with bind() and connect() syscalls with
+// AF_INET6 and the given type bound to the IPv4 loopback.
+SocketPairKind DualStackUDPBidirectionalBindSocketPair(int type);
+
+} // namespace testing
+} // namespace gvisor
+
+#endif // GVISOR_TEST_SYSCALLS_IP_SOCKET_TEST_UTIL_H_
diff --git a/test/syscalls/linux/itimer.cc b/test/syscalls/linux/itimer.cc
new file mode 100644
index 000000000..ee5871cbe
--- /dev/null
+++ b/test/syscalls/linux/itimer.cc
@@ -0,0 +1,342 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <signal.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+
+#include <atomic>
+#include <functional>
+#include <iostream>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/strings/string_view.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/logging.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+#include "test/util/timer_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+constexpr char kSIGALRMToMainThread[] = "--itimer_sigarlm_to_main_thread";
+constexpr char kSIGPROFFairnessActive[] = "--itimer_sigprof_fairness_active";
+constexpr char kSIGPROFFairnessIdle[] = "--itimer_sigprof_fairness_idle";
+
+// Time period to be set for the itimers.
+constexpr absl::Duration kPeriod = absl::Milliseconds(25);
+// Total amount of time to spend per thread.
+constexpr absl::Duration kTestDuration = absl::Seconds(20);
+// Amount of spin iterations to perform as the minimum work item per thread.
+// Chosen to be sub-millisecond range.
+constexpr int kIterations = 10000000;
+// Allow deviation in the number of samples.
+constexpr double kNumSamplesDeviationRatio = 0.2;
+constexpr double kNumSamplesMinRatio = 0.5;
+
+TEST(ItimerTest, ItimervalUpdatedBeforeExpiration) {
+ constexpr int kSleepSecs = 10;
+ constexpr int kAlarmSecs = 15;
+ static_assert(
+ kSleepSecs < kAlarmSecs,
+ "kSleepSecs must be less than kAlarmSecs for the test to be meaningful");
+ constexpr int kMaxRemainingSecs = kAlarmSecs - kSleepSecs;
+
+ // Install a no-op handler for SIGALRM.
+ struct sigaction sa = {};
+ sigfillset(&sa.sa_mask);
+ sa.sa_handler = +[](int signo) {};
+ auto const cleanup_sa =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGALRM, sa));
+
+ // Set an itimer-based alarm for kAlarmSecs from now.
+ struct itimerval itv = {};
+ itv.it_value.tv_sec = kAlarmSecs;
+ auto const cleanup_itimer =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedItimer(ITIMER_REAL, itv));
+
+ // After sleeping for kSleepSecs, the itimer value should reflect the elapsed
+ // time even if it hasn't expired.
+ absl::SleepFor(absl::Seconds(kSleepSecs));
+ ASSERT_THAT(getitimer(ITIMER_REAL, &itv), SyscallSucceeds());
+ EXPECT_TRUE(
+ itv.it_value.tv_sec < kMaxRemainingSecs ||
+ (itv.it_value.tv_sec == kMaxRemainingSecs && itv.it_value.tv_usec == 0))
+ << "Remaining time: " << itv.it_value.tv_sec << " seconds + "
+ << itv.it_value.tv_usec << " microseconds";
+}
+
+ABSL_CONST_INIT static thread_local std::atomic_int signal_test_num_samples =
+ ATOMIC_VAR_INIT(0);
+
+void SignalTestSignalHandler(int /*signum*/) { signal_test_num_samples++; }
+
+struct SignalTestResult {
+ int expected_total;
+ int main_thread_samples;
+ std::vector<int> worker_samples;
+};
+
+std::ostream& operator<<(std::ostream& os, const SignalTestResult& r) {
+ os << "{expected_total: " << r.expected_total
+ << ", main_thread_samples: " << r.main_thread_samples
+ << ", worker_samples: [";
+ bool first = true;
+ for (int sample : r.worker_samples) {
+ if (!first) {
+ os << ", ";
+ }
+ os << sample;
+ first = false;
+ }
+ os << "]}";
+ return os;
+}
+
+// Starts two worker threads and itimer id and measures the number of signal
+// delivered to each thread.
+SignalTestResult ItimerSignalTest(int id, clock_t main_clock,
+ clock_t worker_clock, int signal,
+ absl::Duration sleep) {
+ signal_test_num_samples = 0;
+
+ struct sigaction sa = {};
+ sa.sa_handler = &SignalTestSignalHandler;
+ sa.sa_flags = SA_RESTART;
+ sigemptyset(&sa.sa_mask);
+ auto sigaction_cleanup = std::move(ScopedSigaction(signal, sa).ValueOrDie());
+
+ int socketfds[2];
+ TEST_PCHECK(socketpair(AF_UNIX, SOCK_STREAM, 0, socketfds) == 0);
+
+ // Do the spinning in the workers.
+ std::function<void*(int)> work = [&](int socket_fd) {
+ FileDescriptor fd(socket_fd);
+
+ absl::Time finish = Now(worker_clock) + kTestDuration;
+ while (Now(worker_clock) < finish) {
+ // Blocked on read.
+ char c;
+ RetryEINTR(read)(fd.get(), &c, 1);
+ for (int i = 0; i < kIterations; i++) {
+ // Ensure compiler won't optimize this loop away.
+ asm("");
+ }
+
+ if (sleep != absl::ZeroDuration()) {
+ // Sleep so that the entire process is idle for a while.
+ absl::SleepFor(sleep);
+ }
+
+ // Unblock the other thread.
+ RetryEINTR(write)(fd.get(), &c, 1);
+ }
+
+ return reinterpret_cast<void*>(signal_test_num_samples.load());
+ };
+
+ ScopedThread th1(
+ static_cast<std::function<void*()>>(std::bind(work, socketfds[0])));
+ ScopedThread th2(
+ static_cast<std::function<void*()>>(std::bind(work, socketfds[1])));
+
+ absl::Time start = Now(main_clock);
+ // Start the timer.
+ struct itimerval timer = {};
+ timer.it_value = absl::ToTimeval(kPeriod);
+ timer.it_interval = absl::ToTimeval(kPeriod);
+ auto cleanup_itimer = std::move(ScopedItimer(id, timer).ValueOrDie());
+
+ // Unblock th1.
+ //
+ // N.B. th2 owns socketfds[1] but can't close it until it unblocks.
+ char c = 0;
+ TEST_CHECK(write(socketfds[1], &c, 1) == 1);
+
+ SignalTestResult result;
+
+ // Wait for the workers to be done and collect their sample counts.
+ result.worker_samples.push_back(reinterpret_cast<int64_t>(th1.Join()));
+ result.worker_samples.push_back(reinterpret_cast<int64_t>(th2.Join()));
+ cleanup_itimer.Release()();
+ result.expected_total = (Now(main_clock) - start) / kPeriod;
+ result.main_thread_samples = signal_test_num_samples.load();
+
+ return result;
+}
+
+int TestSIGALRMToMainThread() {
+ SignalTestResult result =
+ ItimerSignalTest(ITIMER_REAL, CLOCK_REALTIME, CLOCK_REALTIME, SIGALRM,
+ absl::ZeroDuration());
+
+ std::cerr << "result: " << result << std::endl;
+
+ // ITIMER_REAL-generated SIGALRMs prefer to deliver to the thread group leader
+ // (but don't guarantee it), so we expect to see most samples on the main
+ // thread.
+ //
+ // Linux only guarantees timers will never expire before the requested time.
+ // Thus, we only check the upper bound and also it at least have one sample.
+ TEST_CHECK(result.main_thread_samples <= result.expected_total);
+ TEST_CHECK(result.main_thread_samples > 0);
+ for (int num : result.worker_samples) {
+ TEST_CHECK_MSG(num <= 50, "worker received too many samples");
+ }
+
+ return 0;
+}
+
+// Random save/restore is disabled as it introduces additional latency and
+// unpredictable distribution patterns.
+TEST(ItimerTest, DeliversSIGALRMToMainThread_NoRandomSave) {
+ pid_t child;
+ int execve_errno;
+ auto kill = ASSERT_NO_ERRNO_AND_VALUE(
+ ForkAndExec("/proc/self/exe", {"/proc/self/exe", kSIGALRMToMainThread},
+ {}, &child, &execve_errno));
+ EXPECT_EQ(0, execve_errno);
+
+ int status;
+ EXPECT_THAT(RetryEINTR(waitpid)(child, &status, 0),
+ SyscallSucceedsWithValue(child));
+
+ // Not required anymore.
+ kill.Release();
+
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) << status;
+}
+
+// Signals are delivered to threads fairly.
+//
+// sleep indicates how long to sleep worker threads each iteration to make the
+// entire process idle.
+int TestSIGPROFFairness(absl::Duration sleep) {
+ SignalTestResult result =
+ ItimerSignalTest(ITIMER_PROF, CLOCK_PROCESS_CPUTIME_ID,
+ CLOCK_THREAD_CPUTIME_ID, SIGPROF, sleep);
+
+ std::cerr << "result: " << result << std::endl;
+
+ // The number of samples on the main thread should be very low as it did
+ // nothing.
+ TEST_CHECK(result.main_thread_samples < 60);
+
+ // Both workers should get roughly equal number of samples.
+ TEST_CHECK(result.worker_samples.size() == 2);
+
+ TEST_CHECK(result.expected_total > 0);
+
+ // In an ideal world each thread would get exactly 50% of the signals,
+ // but since that's unlikely to happen we allow for them to get no less than
+ // kNumSamplesDeviationRatio of the total observed samples.
+ TEST_CHECK_MSG(std::abs(result.worker_samples[0] - result.worker_samples[1]) <
+ ((result.worker_samples[0] + result.worker_samples[1]) *
+ kNumSamplesDeviationRatio),
+ "one worker received disproportionate share of samples");
+
+ return 0;
+}
+
+// Random save/restore is disabled as it introduces additional latency and
+// unpredictable distribution patterns.
+TEST(ItimerTest, DeliversSIGPROFToThreadsRoughlyFairlyActive_NoRandomSave) {
+ pid_t child;
+ int execve_errno;
+ auto kill = ASSERT_NO_ERRNO_AND_VALUE(
+ ForkAndExec("/proc/self/exe", {"/proc/self/exe", kSIGPROFFairnessActive},
+ {}, &child, &execve_errno));
+ EXPECT_EQ(0, execve_errno);
+
+ int status;
+ EXPECT_THAT(RetryEINTR(waitpid)(child, &status, 0),
+ SyscallSucceedsWithValue(child));
+
+ // Not required anymore.
+ kill.Release();
+
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << "Exited with code: " << status;
+}
+
+// Random save/restore is disabled as it introduces additional latency and
+// unpredictable distribution patterns.
+TEST(ItimerTest, DeliversSIGPROFToThreadsRoughlyFairlyIdle_NoRandomSave) {
+ pid_t child;
+ int execve_errno;
+ auto kill = ASSERT_NO_ERRNO_AND_VALUE(
+ ForkAndExec("/proc/self/exe", {"/proc/self/exe", kSIGPROFFairnessIdle},
+ {}, &child, &execve_errno));
+ EXPECT_EQ(0, execve_errno);
+
+ int status;
+ EXPECT_THAT(RetryEINTR(waitpid)(child, &status, 0),
+ SyscallSucceedsWithValue(child));
+
+ // Not required anymore.
+ kill.Release();
+
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << "Exited with code: " << status;
+}
+
+} // namespace
+} // namespace testing
+} // namespace gvisor
+
+namespace {
+void MaskSIGPIPE() {
+ // Always mask SIGPIPE as it's common and tests aren't expected to handle it.
+ // We don't take the TestInit() path so we must do this manually.
+ struct sigaction sa = {};
+ sa.sa_handler = SIG_IGN;
+ TEST_CHECK(sigaction(SIGPIPE, &sa, nullptr) == 0);
+}
+} // namespace
+
+int main(int argc, char** argv) {
+ // These tests require no background threads, so check for them before
+ // TestInit.
+ for (int i = 0; i < argc; i++) {
+ absl::string_view arg(argv[i]);
+
+ if (arg == gvisor::testing::kSIGALRMToMainThread) {
+ MaskSIGPIPE();
+ return gvisor::testing::TestSIGALRMToMainThread();
+ }
+ if (arg == gvisor::testing::kSIGPROFFairnessActive) {
+ MaskSIGPIPE();
+ return gvisor::testing::TestSIGPROFFairness(absl::ZeroDuration());
+ }
+ if (arg == gvisor::testing::kSIGPROFFairnessIdle) {
+ MaskSIGPIPE();
+ return gvisor::testing::TestSIGPROFFairness(absl::Milliseconds(10));
+ }
+ }
+
+ gvisor::testing::TestInit(&argc, &argv);
+
+ return RUN_ALL_TESTS();
+}
diff --git a/test/syscalls/linux/kill.cc b/test/syscalls/linux/kill.cc
new file mode 100644
index 000000000..18ba8fb16
--- /dev/null
+++ b/test/syscalls/linux/kill.cc
@@ -0,0 +1,380 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <cerrno>
+#include <csignal>
+
+#include "gtest/gtest.h"
+#include "absl/synchronization/mutex.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/capability_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/logging.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+DEFINE_int32(scratch_uid, 65534, "scratch UID");
+DEFINE_int32(scratch_gid, 65534, "scratch GID");
+
+using ::testing::Ge;
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(KillTest, CanKillValidPid) {
+ // If pid is positive, then signal sig is sent to the process with the ID
+ // specified by pid.
+ EXPECT_THAT(kill(getpid(), 0), SyscallSucceeds());
+ // If pid equals 0, then sig is sent to every process in the process group of
+ // the calling process.
+ EXPECT_THAT(kill(0, 0), SyscallSucceeds());
+
+ ScopedThread([] { EXPECT_THAT(kill(gettid(), 0), SyscallSucceeds()); });
+}
+
+void SigHandler(int sig, siginfo_t* info, void* context) { _exit(0); }
+
+// If pid equals -1, then sig is sent to every process for which the calling
+// process has permission to send signals, except for process 1 (init).
+TEST(KillTest, CanKillAllPIDs) {
+ int pipe_fds[2];
+ ASSERT_THAT(pipe(pipe_fds), SyscallSucceeds());
+ FileDescriptor read_fd(pipe_fds[0]);
+ FileDescriptor write_fd(pipe_fds[1]);
+
+ pid_t pid = fork();
+ if (pid == 0) {
+ read_fd.reset();
+
+ struct sigaction sa;
+ sa.sa_sigaction = SigHandler;
+ sigfillset(&sa.sa_mask);
+ sa.sa_flags = SA_SIGINFO;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGWINCH, sa));
+
+ // Indicate to the parent that we're ready.
+ write_fd.reset();
+
+ // Wait until we get the signal from the parent.
+ while (true) {
+ pause();
+ }
+ }
+
+ EXPECT_THAT(pid, SyscallSucceeds());
+
+ write_fd.reset();
+
+ // Wait for the child to indicate that it's unmasked the signal by closing
+ // the write end.
+ char buf;
+ ASSERT_THAT(ReadFd(read_fd.get(), &buf, 1), SyscallSucceedsWithValue(0));
+
+ // Signal the child and wait for it to die with status 0, indicating that
+ // it got the expected signal.
+ EXPECT_THAT(kill(-1, SIGWINCH), SyscallSucceeds());
+
+ int status;
+ ASSERT_THAT(RetryEINTR(waitpid)(pid, &status, 0),
+ SyscallSucceedsWithValue(pid));
+ EXPECT_TRUE(WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+}
+
+TEST(KillTest, CannotKillInvalidPID) {
+ // We need an unused pid to verify that kill fails when given one.
+ //
+ // There is no way to guarantee that a PID is unused, but the PID of a
+ // recently exited process likely won't be reused soon.
+ pid_t fake_pid = fork();
+ if (fake_pid == 0) {
+ _exit(0);
+ }
+
+ EXPECT_THAT(fake_pid, SyscallSucceeds());
+
+ int status;
+ ASSERT_THAT(RetryEINTR(waitpid)(fake_pid, &status, 0),
+ SyscallSucceedsWithValue(fake_pid));
+ EXPECT_TRUE(WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+
+ EXPECT_THAT(kill(fake_pid, 0), SyscallFailsWithErrno(ESRCH));
+}
+
+TEST(KillTest, CannotUseInvalidSignal) {
+ EXPECT_THAT(kill(getpid(), 200), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(KillTest, CanKillRemoteProcess) {
+ pid_t pid = fork();
+ if (pid == 0) {
+ while (true) {
+ pause();
+ }
+ }
+
+ EXPECT_THAT(pid, SyscallSucceeds());
+
+ EXPECT_THAT(kill(pid, SIGKILL), SyscallSucceeds());
+
+ int status;
+ ASSERT_THAT(RetryEINTR(waitpid)(pid, &status, 0),
+ SyscallSucceedsWithValue(pid));
+ EXPECT_TRUE(WIFSIGNALED(status));
+ EXPECT_EQ(SIGKILL, WTERMSIG(status));
+}
+
+TEST(KillTest, CanKillOwnProcess) {
+ EXPECT_THAT(kill(getpid(), 0), SyscallSucceeds());
+}
+
+// Verify that you can kill a process even using a tid from a thread other than
+// the group leader.
+TEST(KillTest, CannotKillTid) {
+ pid_t tid;
+ bool tid_available = false;
+ bool finished = false;
+ absl::Mutex mu;
+ ScopedThread t([&] {
+ mu.Lock();
+ tid = gettid();
+ tid_available = true;
+ mu.Await(absl::Condition(&finished));
+ mu.Unlock();
+ });
+ mu.LockWhen(absl::Condition(&tid_available));
+ EXPECT_THAT(kill(tid, 0), SyscallSucceeds());
+ finished = true;
+ mu.Unlock();
+}
+
+TEST(KillTest, SetPgid) {
+ for (int i = 0; i < 10; i++) {
+ // The following in the normal pattern for creating a new process group.
+ // Both the parent and child process will call setpgid in order to avoid any
+ // race conditions. We do this ten times to catch races.
+ pid_t pid = fork();
+ if (pid == 0) {
+ setpgid(0, 0);
+ while (true) {
+ pause();
+ }
+ }
+
+ EXPECT_THAT(pid, SyscallSucceeds());
+
+ // Set the child's group and exit.
+ ASSERT_THAT(setpgid(pid, pid), SyscallSucceeds());
+ EXPECT_THAT(kill(pid, SIGKILL), SyscallSucceeds());
+
+ int status;
+ EXPECT_THAT(RetryEINTR(waitpid)(-pid, &status, 0),
+ SyscallSucceedsWithValue(pid));
+ EXPECT_TRUE(WIFSIGNALED(status));
+ EXPECT_EQ(SIGKILL, WTERMSIG(status));
+ }
+}
+
+TEST(KillTest, ProcessGroups) {
+ // Fork a new child.
+ //
+ // other_child is used as a placeholder process. We use this PID as our "does
+ // not exist" process group to ensure some amount of safety. (It is still
+ // possible to violate this assumption, but extremely unlikely.)
+ pid_t child = fork();
+ if (child == 0) {
+ while (true) {
+ pause();
+ }
+ }
+ EXPECT_THAT(child, SyscallSucceeds());
+
+ pid_t other_child = fork();
+ if (other_child == 0) {
+ while (true) {
+ pause();
+ }
+ }
+
+ // Ensure the kill does not succeed without the new group.
+ EXPECT_THAT(kill(-child, SIGKILL), SyscallFailsWithErrno(ESRCH));
+
+ // Put the child in its own process group.
+ ASSERT_THAT(setpgid(child, child), SyscallSucceeds());
+
+ // This should be not allowed: you can only create a new group with the same
+ // id or join an existing one. The other_child group should not exist.
+ ASSERT_THAT(setpgid(child, other_child), SyscallFailsWithErrno(EPERM));
+
+ // Done with other_child; kill it.
+ EXPECT_THAT(kill(other_child, SIGKILL), SyscallSucceeds());
+ int status;
+ EXPECT_THAT(RetryEINTR(waitpid)(other_child, &status, 0), SyscallSucceeds());
+
+ // Linux returns success for the no-op call.
+ ASSERT_THAT(setpgid(child, child), SyscallSucceeds());
+
+ // Kill the child's process group.
+ ASSERT_THAT(kill(-child, SIGKILL), SyscallSucceeds());
+
+ // Wait on the process group; ensure that the signal was as expected.
+ EXPECT_THAT(RetryEINTR(waitpid)(-child, &status, 0),
+ SyscallSucceedsWithValue(child));
+ EXPECT_TRUE(WIFSIGNALED(status));
+ EXPECT_EQ(SIGKILL, WTERMSIG(status));
+
+ // Try to kill the process group again; ensure that the wait fails.
+ EXPECT_THAT(kill(-child, SIGKILL), SyscallFailsWithErrno(ESRCH));
+ EXPECT_THAT(RetryEINTR(waitpid)(-child, &status, 0),
+ SyscallFailsWithErrno(ECHILD));
+}
+
+TEST(KillTest, ChildDropsPrivsCannotKill) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID)));
+
+ int uid = FLAGS_scratch_uid;
+ int gid = FLAGS_scratch_gid;
+
+ // Create the child that drops privileges and tries to kill the parent.
+ pid_t pid = fork();
+ if (pid == 0) {
+ TEST_PCHECK(setresgid(gid, gid, gid) == 0);
+ MaybeSave();
+
+ TEST_PCHECK(setresuid(uid, uid, uid) == 0);
+ MaybeSave();
+
+ // setresuid should have dropped CAP_KILL. Make sure.
+ TEST_CHECK(!HaveCapability(CAP_KILL).ValueOrDie());
+
+ // Try to kill parent with every signal-sending syscall possible.
+ pid_t parent = getppid();
+
+ TEST_CHECK(kill(parent, SIGKILL) < 0);
+ TEST_PCHECK_MSG(errno == EPERM, "kill failed with wrong errno");
+ MaybeSave();
+
+ TEST_CHECK(tgkill(parent, parent, SIGKILL) < 0);
+ TEST_PCHECK_MSG(errno == EPERM, "tgkill failed with wrong errno");
+ MaybeSave();
+
+ TEST_CHECK(syscall(SYS_tkill, parent, SIGKILL) < 0);
+ TEST_PCHECK_MSG(errno == EPERM, "tkill failed with wrong errno");
+ MaybeSave();
+
+ siginfo_t uinfo;
+ uinfo.si_code = -1; // SI_QUEUE (allowed).
+
+ TEST_CHECK(syscall(SYS_rt_sigqueueinfo, parent, SIGKILL, &uinfo) < 0);
+ TEST_PCHECK_MSG(errno == EPERM, "rt_sigqueueinfo failed with wrong errno");
+ MaybeSave();
+
+ TEST_CHECK(syscall(SYS_rt_tgsigqueueinfo, parent, parent, SIGKILL, &uinfo) <
+ 0);
+ TEST_PCHECK_MSG(errno == EPERM, "rt_sigqueueinfo failed with wrong errno");
+ MaybeSave();
+
+ _exit(0);
+ }
+
+ EXPECT_THAT(pid, SyscallSucceeds());
+
+ int status;
+ EXPECT_THAT(RetryEINTR(waitpid)(pid, &status, 0),
+ SyscallSucceedsWithValue(pid));
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << "status = " << status;
+}
+
+TEST(KillTest, CanSIGCONTSameSession) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID)));
+
+ pid_t stopped_child = fork();
+ if (stopped_child == 0) {
+ raise(SIGSTOP);
+ _exit(0);
+ }
+
+ EXPECT_THAT(stopped_child, SyscallSucceeds());
+
+ // Put the child in its own process group. The child and parent process
+ // groups also share a session.
+ ASSERT_THAT(setpgid(stopped_child, stopped_child), SyscallSucceeds());
+
+ // Make sure child stopped.
+ int status;
+ EXPECT_THAT(RetryEINTR(waitpid)(stopped_child, &status, WUNTRACED),
+ SyscallSucceedsWithValue(stopped_child));
+ EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
+ << "status " << status;
+
+ int uid = FLAGS_scratch_uid;
+ int gid = FLAGS_scratch_gid;
+
+ // Drop privileges only in child process, or else this parent process won't be
+ // able to open some log files after the test ends.
+ pid_t other_child = fork();
+ if (other_child == 0) {
+ // Drop privileges.
+ TEST_PCHECK(setresgid(gid, gid, gid) == 0);
+ MaybeSave();
+
+ TEST_PCHECK(setresuid(uid, uid, uid) == 0);
+ MaybeSave();
+
+ // setresuid should have dropped CAP_KILL.
+ TEST_CHECK(!HaveCapability(CAP_KILL).ValueOrDie());
+
+ // Child 2 and child should now not share a thread group and any UIDs.
+ // Child 2 should have no privileges. That means any signal other than
+ // SIGCONT should fail.
+ TEST_CHECK(kill(stopped_child, SIGKILL) < 0);
+ TEST_PCHECK_MSG(errno == EPERM, "kill failed with wrong errno");
+ MaybeSave();
+
+ TEST_PCHECK(kill(stopped_child, SIGCONT) == 0);
+ MaybeSave();
+
+ _exit(0);
+ }
+
+ EXPECT_THAT(stopped_child, SyscallSucceeds());
+
+ // Make sure child exited normally.
+ EXPECT_THAT(RetryEINTR(waitpid)(stopped_child, &status, 0),
+ SyscallSucceedsWithValue(stopped_child));
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << "status " << status;
+
+ // Make sure other_child exited normally.
+ EXPECT_THAT(RetryEINTR(waitpid)(other_child, &status, 0),
+ SyscallSucceedsWithValue(other_child));
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << "status " << status;
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/link.cc b/test/syscalls/linux/link.cc
new file mode 100644
index 000000000..ed74437bc
--- /dev/null
+++ b/test/syscalls/linux/link.cc
@@ -0,0 +1,291 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <string>
+
+#include "gtest/gtest.h"
+#include "absl/strings/str_cat.h"
+#include "test/util/capability_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+DEFINE_int32(scratch_uid, 65534, "scratch UID");
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// IsSameFile returns true if both filenames have the same device and inode.
+bool IsSameFile(const std::string& f1, const std::string& f2) {
+ // Use lstat rather than stat, so that symlinks are not followed.
+ struct stat stat1 = {};
+ EXPECT_THAT(lstat(f1.c_str(), &stat1), SyscallSucceeds());
+ struct stat stat2 = {};
+ EXPECT_THAT(lstat(f2.c_str(), &stat2), SyscallSucceeds());
+
+ return stat1.st_dev == stat2.st_dev && stat1.st_ino == stat2.st_ino;
+}
+
+TEST(LinkTest, CanCreateLinkFile) {
+ auto oldfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ const std::string newname = NewTempAbsPath();
+
+ // Get the initial link count.
+ uint64_t initial_link_count = ASSERT_NO_ERRNO_AND_VALUE(Links(oldfile.path()));
+
+ EXPECT_THAT(link(oldfile.path().c_str(), newname.c_str()), SyscallSucceeds());
+
+ EXPECT_TRUE(IsSameFile(oldfile.path(), newname));
+
+ // Link count should be incremented.
+ EXPECT_THAT(Links(oldfile.path()),
+ IsPosixErrorOkAndHolds(initial_link_count + 1));
+
+ // Delete the link.
+ EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds());
+
+ // Link count should be back to initial.
+ EXPECT_THAT(Links(oldfile.path()),
+ IsPosixErrorOkAndHolds(initial_link_count));
+}
+
+TEST(LinkTest, PermissionDenied) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_FOWNER)));
+
+ // Make the file "unsafe" to link by making it only readable, but not
+ // writable.
+ const auto oldfile =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0400));
+ const std::string newname = NewTempAbsPath();
+
+ // Do setuid in a separate thread so that after finishing this test, the
+ // process can still open files the test harness created before starting this
+ // test. Otherwise, the files are created by root (UID before the test), but
+ // cannot be opened by the `uid` set below after the test. After calling
+ // setuid(non-zero-UID), there is no way to get root privileges back.
+ ScopedThread([&] {
+ // Use syscall instead of glibc setuid wrapper because we want this setuid
+ // call to only apply to this task. POSIX threads, however, require that all
+ // threads have the same UIDs, so using the setuid wrapper sets all threads'
+ // real UID.
+ // Also drops capabilities.
+ EXPECT_THAT(syscall(SYS_setuid, FLAGS_scratch_uid), SyscallSucceeds());
+
+ EXPECT_THAT(link(oldfile.path().c_str(), newname.c_str()),
+ SyscallFailsWithErrno(EPERM));
+ });
+}
+
+TEST(LinkTest, CannotLinkDirectory) {
+ auto olddir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const std::string newdir = NewTempAbsPath();
+
+ EXPECT_THAT(link(olddir.path().c_str(), newdir.c_str()),
+ SyscallFailsWithErrno(EPERM));
+
+ EXPECT_THAT(rmdir(olddir.path().c_str()), SyscallSucceeds());
+}
+
+TEST(LinkTest, CannotLinkWithSlash) {
+ auto oldfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ // Put a final "/" on newname.
+ const std::string newname = absl::StrCat(NewTempAbsPath(), "/");
+
+ EXPECT_THAT(link(oldfile.path().c_str(), newname.c_str()),
+ SyscallFailsWithErrno(ENOENT));
+}
+
+TEST(LinkTest, OldnameIsEmpty) {
+ const std::string newname = NewTempAbsPath();
+ EXPECT_THAT(link("", newname.c_str()), SyscallFailsWithErrno(ENOENT));
+}
+
+TEST(LinkTest, OldnameDoesNotExist) {
+ const std::string oldname = NewTempAbsPath();
+ const std::string newname = NewTempAbsPath();
+ EXPECT_THAT(link("", newname.c_str()), SyscallFailsWithErrno(ENOENT));
+}
+
+TEST(LinkTest, NewnameCannotExist) {
+ const std::string newname =
+ JoinPath(GetAbsoluteTestTmpdir(), "thisdoesnotexist", "foo");
+ EXPECT_THAT(link("/thisdoesnotmatter", newname.c_str()),
+ SyscallFailsWithErrno(ENOENT));
+}
+
+TEST(LinkTest, WithOldDirFD) {
+ const std::string oldname_parent = NewTempAbsPath();
+ const std::string oldname_base = "child";
+ const std::string oldname = JoinPath(oldname_parent, oldname_base);
+ const std::string newname = NewTempAbsPath();
+
+ // Create oldname_parent directory, and get an FD.
+ ASSERT_THAT(mkdir(oldname_parent.c_str(), 0777), SyscallSucceeds());
+ const FileDescriptor oldname_parent_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(oldname_parent, O_DIRECTORY | O_RDONLY));
+
+ // Create oldname file.
+ const FileDescriptor oldname_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(oldname, O_CREAT | O_RDWR, 0666));
+
+ // Link oldname to newname, using oldname_parent_fd.
+ EXPECT_THAT(linkat(oldname_parent_fd.get(), oldname_base.c_str(), AT_FDCWD,
+ newname.c_str(), 0),
+ SyscallSucceeds());
+
+ EXPECT_TRUE(IsSameFile(oldname, newname));
+
+ EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds());
+ EXPECT_THAT(unlink(oldname.c_str()), SyscallSucceeds());
+ EXPECT_THAT(rmdir(oldname_parent.c_str()), SyscallSucceeds());
+}
+
+TEST(LinkTest, BogusFlags) {
+ ASSERT_THAT(linkat(1, "foo", 2, "bar", 3), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(LinkTest, WithNewDirFD) {
+ auto oldfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ const std::string newname_parent = NewTempAbsPath();
+ const std::string newname_base = "child";
+ const std::string newname = JoinPath(newname_parent, newname_base);
+
+ // Create newname_parent directory, and get an FD.
+ EXPECT_THAT(mkdir(newname_parent.c_str(), 0777), SyscallSucceeds());
+ const FileDescriptor newname_parent_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(newname_parent, O_DIRECTORY | O_RDONLY));
+
+ // Link newname to oldfile, using newname_parent_fd.
+ EXPECT_THAT(linkat(AT_FDCWD, oldfile.path().c_str(), newname_parent_fd.get(),
+ newname.c_str(), 0),
+ SyscallSucceeds());
+
+ EXPECT_TRUE(IsSameFile(oldfile.path(), newname));
+
+ EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds());
+ EXPECT_THAT(rmdir(newname_parent.c_str()), SyscallSucceeds());
+}
+
+TEST(LinkTest, RelPathsWithNonDirFDs) {
+ auto oldfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+ // Create a file that will be passed as the directory fd for old/new names.
+ const std::string filename = NewTempAbsPath();
+ const FileDescriptor file_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(filename, O_CREAT | O_RDWR, 0666));
+
+ // Using file_fd as olddirfd will fail.
+ EXPECT_THAT(linkat(file_fd.get(), "foo", AT_FDCWD, "bar", 0),
+ SyscallFailsWithErrno(ENOTDIR));
+
+ // Using file_fd as newdirfd will fail.
+ EXPECT_THAT(linkat(AT_FDCWD, oldfile.path().c_str(), file_fd.get(), "bar", 0),
+ SyscallFailsWithErrno(ENOTDIR));
+}
+
+TEST(LinkTest, AbsPathsWithNonDirFDs) {
+ auto oldfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ const std::string newname = NewTempAbsPath();
+
+ // Create a file that will be passed as the directory fd for old/new names.
+ const std::string filename = NewTempAbsPath();
+ const FileDescriptor file_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(filename, O_CREAT | O_RDWR, 0666));
+
+ // Using file_fd as the dirfds is OK as long as paths are absolute.
+ EXPECT_THAT(linkat(file_fd.get(), oldfile.path().c_str(), file_fd.get(),
+ newname.c_str(), 0),
+ SyscallSucceeds());
+}
+
+TEST(LinkTest, LinkDoesNotFollowSymlinks) {
+ // Create oldfile, and oldsymlink which points to it.
+ auto oldfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ const std::string oldsymlink = NewTempAbsPath();
+ EXPECT_THAT(symlink(oldfile.path().c_str(), oldsymlink.c_str()),
+ SyscallSucceeds());
+
+ // Now hard link newname to oldsymlink.
+ const std::string newname = NewTempAbsPath();
+ EXPECT_THAT(link(oldsymlink.c_str(), newname.c_str()), SyscallSucceeds());
+
+ // The link should not have resolved the symlink, so newname and oldsymlink
+ // are the same.
+ EXPECT_TRUE(IsSameFile(oldsymlink, newname));
+ EXPECT_FALSE(IsSameFile(oldfile.path(), newname));
+
+ EXPECT_THAT(unlink(oldsymlink.c_str()), SyscallSucceeds());
+ EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds());
+}
+
+TEST(LinkTest, LinkatDoesNotFollowSymlinkByDefault) {
+ // Create oldfile, and oldsymlink which points to it.
+ auto oldfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ const std::string oldsymlink = NewTempAbsPath();
+ EXPECT_THAT(symlink(oldfile.path().c_str(), oldsymlink.c_str()),
+ SyscallSucceeds());
+
+ // Now hard link newname to oldsymlink.
+ const std::string newname = NewTempAbsPath();
+ EXPECT_THAT(
+ linkat(AT_FDCWD, oldsymlink.c_str(), AT_FDCWD, newname.c_str(), 0),
+ SyscallSucceeds());
+
+ // The link should not have resolved the symlink, so newname and oldsymlink
+ // are the same.
+ EXPECT_TRUE(IsSameFile(oldsymlink, newname));
+ EXPECT_FALSE(IsSameFile(oldfile.path(), newname));
+
+ EXPECT_THAT(unlink(oldsymlink.c_str()), SyscallSucceeds());
+ EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds());
+}
+
+TEST(LinkTest, LinkatWithSymlinkFollow) {
+ // Create oldfile, and oldsymlink which points to it.
+ auto oldfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ const std::string oldsymlink = NewTempAbsPath();
+ ASSERT_THAT(symlink(oldfile.path().c_str(), oldsymlink.c_str()),
+ SyscallSucceeds());
+
+ // Now hard link newname to oldsymlink, and pass AT_SYMLINK_FOLLOW flag.
+ const std::string newname = NewTempAbsPath();
+ ASSERT_THAT(linkat(AT_FDCWD, oldsymlink.c_str(), AT_FDCWD, newname.c_str(),
+ AT_SYMLINK_FOLLOW),
+ SyscallSucceeds());
+
+ // The link should have resolved the symlink, so oldfile and newname are the
+ // same.
+ EXPECT_TRUE(IsSameFile(oldfile.path(), newname));
+ EXPECT_FALSE(IsSameFile(oldsymlink, newname));
+
+ EXPECT_THAT(unlink(oldsymlink.c_str()), SyscallSucceeds());
+ EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds());
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/lseek.cc b/test/syscalls/linux/lseek.cc
new file mode 100644
index 000000000..fb6a1546e
--- /dev/null
+++ b/test/syscalls/linux/lseek.cc
@@ -0,0 +1,202 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(LseekTest, InvalidWhence) {
+ const std::string kFileData = "hello world\n";
+ const TempPath path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), kFileData, TempPath::kDefaultFileMode));
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(path.path(), O_RDWR, 0644));
+
+ ASSERT_THAT(lseek(fd.get(), 0, -1), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(LseekTest, NegativeOffset) {
+ const std::string kFileData = "hello world\n";
+ const TempPath path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), kFileData, TempPath::kDefaultFileMode));
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(path.path(), O_RDWR, 0644));
+
+ EXPECT_THAT(lseek(fd.get(), -(kFileData.length() + 1), SEEK_CUR),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+// A 32-bit off_t is not large enough to represent an offset larger than
+// maximum file size on standard file systems, so it isn't possible to cause
+// overflow.
+#ifdef __x86_64__
+TEST(LseekTest, Overflow) {
+ // HA! Classic Linux. We really should have an EOVERFLOW
+ // here, since we're seeking to something that cannot be
+ // represented.. but instead we are given an EINVAL.
+ const std::string kFileData = "hello world\n";
+ const TempPath path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), kFileData, TempPath::kDefaultFileMode));
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(path.path(), O_RDWR, 0644));
+ EXPECT_THAT(lseek(fd.get(), 0x7fffffffffffffff, SEEK_END),
+ SyscallFailsWithErrno(EINVAL));
+}
+#endif
+
+TEST(LseekTest, Set) {
+ const std::string kFileData = "hello world\n";
+ const TempPath path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), kFileData, TempPath::kDefaultFileMode));
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(path.path(), O_RDWR, 0644));
+
+ char buf = '\0';
+ EXPECT_THAT(lseek(fd.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0));
+ ASSERT_THAT(read(fd.get(), &buf, 1), SyscallSucceedsWithValue(1));
+ EXPECT_EQ(buf, kFileData.c_str()[0]);
+ EXPECT_THAT(lseek(fd.get(), 6, SEEK_SET), SyscallSucceedsWithValue(6));
+ ASSERT_THAT(read(fd.get(), &buf, 1), SyscallSucceedsWithValue(1));
+ EXPECT_EQ(buf, kFileData.c_str()[6]);
+}
+
+TEST(LseekTest, Cur) {
+ const std::string kFileData = "hello world\n";
+ const TempPath path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), kFileData, TempPath::kDefaultFileMode));
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(path.path(), O_RDWR, 0644));
+
+ char buf = '\0';
+ EXPECT_THAT(lseek(fd.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0));
+ ASSERT_THAT(read(fd.get(), &buf, 1), SyscallSucceedsWithValue(1));
+ EXPECT_EQ(buf, kFileData.c_str()[0]);
+ EXPECT_THAT(lseek(fd.get(), 3, SEEK_CUR), SyscallSucceedsWithValue(4));
+ ASSERT_THAT(read(fd.get(), &buf, 1), SyscallSucceedsWithValue(1));
+ EXPECT_EQ(buf, kFileData.c_str()[4]);
+}
+
+TEST(LseekTest, End) {
+ const std::string kFileData = "hello world\n";
+ const TempPath path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), kFileData, TempPath::kDefaultFileMode));
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(path.path(), O_RDWR, 0644));
+
+ char buf = '\0';
+ EXPECT_THAT(lseek(fd.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0));
+ ASSERT_THAT(read(fd.get(), &buf, 1), SyscallSucceedsWithValue(1));
+ EXPECT_EQ(buf, kFileData.c_str()[0]);
+ EXPECT_THAT(lseek(fd.get(), -2, SEEK_END), SyscallSucceedsWithValue(10));
+ ASSERT_THAT(read(fd.get(), &buf, 1), SyscallSucceedsWithValue(1));
+ EXPECT_EQ(buf, kFileData.c_str()[kFileData.length() - 2]);
+}
+
+TEST(LseekTest, InvalidFD) {
+ EXPECT_THAT(lseek(-1, 0, SEEK_SET), SyscallFailsWithErrno(EBADF));
+}
+
+TEST(LseekTest, DirCurEnd) {
+ const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open("/tmp", O_RDONLY));
+ ASSERT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0));
+}
+
+TEST(LseekTest, ProcDir) {
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self", O_RDONLY));
+ ASSERT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceeds());
+ ASSERT_THAT(lseek(fd.get(), 0, SEEK_END), SyscallSucceeds());
+}
+
+TEST(LseekTest, ProcFile) {
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/meminfo", O_RDONLY));
+ ASSERT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceeds());
+ ASSERT_THAT(lseek(fd.get(), 0, SEEK_END), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(LseekTest, SysDir) {
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/sys/devices", O_RDONLY));
+ ASSERT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceeds());
+ ASSERT_THAT(lseek(fd.get(), 0, SEEK_END), SyscallSucceeds());
+}
+
+TEST(LseekTest, SeekCurrentDir) {
+ // From include/linux/fs.h.
+ constexpr loff_t MAX_LFS_FILESIZE = 0x7fffffffffffffff;
+
+ char* dir = get_current_dir_name();
+ const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(dir, O_RDONLY));
+
+ ASSERT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceeds());
+ ASSERT_THAT(lseek(fd.get(), 0, SEEK_END),
+ // Some filesystems (like ext4) allow lseek(SEEK_END) on a
+ // directory and return MAX_LFS_FILESIZE, others return EINVAL.
+ AnyOf(SyscallSucceedsWithValue(MAX_LFS_FILESIZE),
+ SyscallFailsWithErrno(EINVAL)));
+ free(dir);
+}
+
+TEST(LseekTest, ProcStatTwice) {
+ const FileDescriptor fd1 =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/stat", O_RDONLY));
+ const FileDescriptor fd2 =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/stat", O_RDONLY));
+
+ ASSERT_THAT(lseek(fd1.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0));
+ ASSERT_THAT(lseek(fd1.get(), 0, SEEK_END), SyscallFailsWithErrno(EINVAL));
+ ASSERT_THAT(lseek(fd1.get(), 1000, SEEK_CUR), SyscallSucceeds());
+ // Check that just because we moved fd1, fd2 doesn't move.
+ ASSERT_THAT(lseek(fd2.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0));
+
+ const FileDescriptor fd3 =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/stat", O_RDONLY));
+ ASSERT_THAT(lseek(fd3.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0));
+}
+
+TEST(LseekTest, EtcPasswdDup) {
+ const FileDescriptor fd1 =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/etc/passwd", O_RDONLY));
+ const FileDescriptor fd2 = ASSERT_NO_ERRNO_AND_VALUE(fd1.Dup());
+
+ ASSERT_THAT(lseek(fd1.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0));
+ ASSERT_THAT(lseek(fd2.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0));
+ ASSERT_THAT(lseek(fd1.get(), 1000, SEEK_CUR), SyscallSucceeds());
+ // Check that just because we moved fd1, fd2 doesn't move.
+ ASSERT_THAT(lseek(fd2.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(1000));
+
+ const FileDescriptor fd3 = ASSERT_NO_ERRNO_AND_VALUE(fd1.Dup());
+ ASSERT_THAT(lseek(fd3.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(1000));
+}
+
+// TODO: Add tests where we have donated in sockets.
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/madvise.cc b/test/syscalls/linux/madvise.cc
new file mode 100644
index 000000000..a79c8c75d
--- /dev/null
+++ b/test/syscalls/linux/madvise.cc
@@ -0,0 +1,142 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <string>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/logging.h"
+#include "test/util/memory_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+void ExpectAllMappingBytes(Mapping const& m, char c) {
+ auto const v = m.view();
+ for (size_t i = 0; i < kPageSize; i++) {
+ ASSERT_EQ(v[i], c) << "at offset " << i;
+ }
+}
+
+// Equivalent to ExpectAllMappingBytes but async-signal-safe and with less
+// helpful failure messages.
+void CheckAllMappingBytes(Mapping const& m, char c) {
+ auto const v = m.view();
+ for (size_t i = 0; i < kPageSize; i++) {
+ TEST_CHECK_MSG(v[i] == c, "mapping contains wrong value");
+ }
+}
+
+TEST(MadviseDontneedTest, ZerosPrivateAnonPage) {
+ auto m = ASSERT_NO_ERRNO_AND_VALUE(
+ MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+ ExpectAllMappingBytes(m, 0);
+ memset(m.ptr(), 1, m.len());
+ ExpectAllMappingBytes(m, 1);
+ ASSERT_THAT(madvise(m.ptr(), m.len(), MADV_DONTNEED), SyscallSucceeds());
+ ExpectAllMappingBytes(m, 0);
+}
+
+TEST(MadviseDontneedTest, ZerosCOWAnonPageInCallerOnly) {
+ auto m = ASSERT_NO_ERRNO_AND_VALUE(
+ MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+ ExpectAllMappingBytes(m, 0);
+ memset(m.ptr(), 2, m.len());
+ ExpectAllMappingBytes(m, 2);
+
+ // Do madvise in a child process.
+ pid_t pid = fork();
+ CheckAllMappingBytes(m, 2);
+ if (pid == 0) {
+ TEST_PCHECK(madvise(m.ptr(), m.len(), MADV_DONTNEED) == 0);
+ CheckAllMappingBytes(m, 0);
+ _exit(0);
+ }
+
+ ASSERT_THAT(pid, SyscallSucceeds());
+
+ int status = 0;
+ ASSERT_THAT(waitpid(-1, &status, 0), SyscallSucceedsWithValue(pid));
+ EXPECT_TRUE(WIFEXITED(status));
+ EXPECT_EQ(WEXITSTATUS(status), 0);
+ // The child's madvise should not have affected the parent.
+ ExpectAllMappingBytes(m, 2);
+}
+
+TEST(MadviseDontneedTest, DoesNotModifySharedAnonPage) {
+ auto m = ASSERT_NO_ERRNO_AND_VALUE(
+ MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED));
+ ExpectAllMappingBytes(m, 0);
+ memset(m.ptr(), 3, m.len());
+ ExpectAllMappingBytes(m, 3);
+ ASSERT_THAT(madvise(m.ptr(), m.len(), MADV_DONTNEED), SyscallSucceeds());
+ ExpectAllMappingBytes(m, 3);
+}
+
+TEST(MadviseDontneedTest, CleansPrivateFilePage) {
+ TempPath f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ /* parent = */ GetAbsoluteTestTmpdir(),
+ /* content = */ std::string(kPageSize, 4), TempPath::kDefaultFileMode));
+ FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDWR));
+
+ Mapping m = ASSERT_NO_ERRNO_AND_VALUE(Mmap(
+ nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd.get(), 0));
+ ExpectAllMappingBytes(m, 4);
+ memset(m.ptr(), 5, m.len());
+ ExpectAllMappingBytes(m, 5);
+ ASSERT_THAT(madvise(m.ptr(), m.len(), MADV_DONTNEED), SyscallSucceeds());
+ ExpectAllMappingBytes(m, 4);
+}
+
+TEST(MadviseDontneedTest, DoesNotModifySharedFilePage) {
+ TempPath f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ /* parent = */ GetAbsoluteTestTmpdir(),
+ /* content = */ std::string(kPageSize, 6), TempPath::kDefaultFileMode));
+ FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDWR));
+
+ Mapping m = ASSERT_NO_ERRNO_AND_VALUE(Mmap(
+ nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd.get(), 0));
+ ExpectAllMappingBytes(m, 6);
+ memset(m.ptr(), 7, m.len());
+ ExpectAllMappingBytes(m, 7);
+ ASSERT_THAT(madvise(m.ptr(), m.len(), MADV_DONTNEED), SyscallSucceeds());
+ ExpectAllMappingBytes(m, 7);
+}
+
+TEST(MadviseDontneedTest, IgnoresPermissions) {
+ auto m =
+ ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, MAP_PRIVATE));
+ EXPECT_THAT(madvise(m.ptr(), m.len(), MADV_DONTNEED), SyscallSucceeds());
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/memory_accounting.cc b/test/syscalls/linux/memory_accounting.cc
new file mode 100644
index 000000000..b4b680c34
--- /dev/null
+++ b/test/syscalls/linux/memory_accounting.cc
@@ -0,0 +1,99 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/mman.h>
+#include <map>
+
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "absl/strings/match.h"
+#include "absl/strings/numbers.h"
+#include "absl/strings/str_format.h"
+#include "absl/strings/str_split.h"
+#include "test/util/fs_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+using ::absl::StrFormat;
+
+// AnonUsageFromMeminfo scrapes the current anonymous memory usage from
+// /proc/meminfo and returns it in bytes.
+PosixErrorOr<uint64_t> AnonUsageFromMeminfo() {
+ ASSIGN_OR_RETURN_ERRNO(auto meminfo, GetContents("/proc/meminfo"));
+ std::vector<std::string> lines(absl::StrSplit(meminfo, '\n'));
+
+ // Try to find AnonPages line, the format is AnonPages:\\s+(\\d+) kB\n.
+ for (const auto& line : lines) {
+ if (!absl::StartsWith(line, "AnonPages:")) {
+ continue;
+ }
+
+ std::vector<std::string> parts(
+ absl::StrSplit(line, ' ', absl::SkipEmpty()));
+ if (parts.size() == 3) {
+ // The size is the second field, let's try to parse it as a number.
+ ASSIGN_OR_RETURN_ERRNO(auto anon_kb, Atoi<uint64_t>(parts[1]));
+ return anon_kb * 1024;
+ }
+
+ return PosixError(EINVAL, "AnonPages field in /proc/meminfo was malformed");
+ }
+
+ return PosixError(EINVAL, "AnonPages field not found in /proc/meminfo");
+}
+
+TEST(MemoryAccounting, AnonAccountingPreservedOnSaveRestore) {
+ // This test isn't meaningful on Linux. /proc/meminfo reports system-wide
+ // memory usage, which can change arbitrarily in Linux from other activity on
+ // the machine. In gvisor, this test is the only thing running on the
+ // "machine", so values in /proc/meminfo accurately reflect the memory used by
+ // the test.
+ SKIP_IF(!IsRunningOnGvisor());
+
+ uint64_t anon_initial = ASSERT_NO_ERRNO_AND_VALUE(AnonUsageFromMeminfo());
+
+ // Cause some anonymous memory usage.
+ uint64_t map_bytes = Megabytes(512);
+ char* mem =
+ static_cast<char*>(mmap(nullptr, map_bytes, PROT_READ | PROT_WRITE,
+ MAP_POPULATE | MAP_ANON | MAP_PRIVATE, -1, 0));
+ ASSERT_NE(mem, MAP_FAILED)
+ << "Map failed, errno: " << errno << " (" << strerror(errno) << ").";
+
+ // Write something to each page to prevent them from being decommited on
+ // S/R. Zero pages are dropped on save.
+ for (uint64_t i = 0; i < map_bytes; i += kPageSize) {
+ mem[i] = 'a';
+ }
+
+ uint64_t anon_after_alloc = ASSERT_NO_ERRNO_AND_VALUE(AnonUsageFromMeminfo());
+ EXPECT_THAT(anon_after_alloc,
+ EquivalentWithin(anon_initial + map_bytes, 0.03));
+
+ // We have many implicit S/R cycles from scraping /proc/meminfo throughout the
+ // test, but throw an explicit S/R in here as well.
+ MaybeSave();
+
+ // Usage should remain the same across S/R.
+ uint64_t anon_after_sr = ASSERT_NO_ERRNO_AND_VALUE(AnonUsageFromMeminfo());
+ EXPECT_THAT(anon_after_sr, EquivalentWithin(anon_after_alloc, 0.03));
+}
+
+} // namespace
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/mempolicy.cc b/test/syscalls/linux/mempolicy.cc
new file mode 100644
index 000000000..9f8033bdf
--- /dev/null
+++ b/test/syscalls/linux/mempolicy.cc
@@ -0,0 +1,258 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <sys/syscall.h>
+
+#include "gtest/gtest.h"
+#include "absl/memory/memory.h"
+#include "test/util/cleanup.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+#define BITS_PER_BYTE 8
+
+#define MPOL_F_STATIC_NODES (1 << 15)
+#define MPOL_F_RELATIVE_NODES (1 << 14)
+#define MPOL_DEFAULT 0
+#define MPOL_PREFERRED 1
+#define MPOL_BIND 2
+#define MPOL_INTERLEAVE 3
+#define MPOL_MAX MPOL_INTERLEAVE
+#define MPOL_F_NODE (1 << 0)
+#define MPOL_F_ADDR (1 << 1)
+#define MPOL_F_MEMS_ALLOWED (1 << 2)
+#define MPOL_MF_STRICT (1 << 0)
+#define MPOL_MF_MOVE (1 << 1)
+#define MPOL_MF_MOVE_ALL (1 << 2)
+
+int get_mempolicy(int *policy, uint64_t *nmask, uint64_t maxnode, void *addr,
+ int flags) {
+ return syscall(__NR_get_mempolicy, policy, nmask, maxnode, addr, flags);
+}
+
+int set_mempolicy(int mode, uint64_t *nmask, uint64_t maxnode) {
+ return syscall(__NR_set_mempolicy, mode, nmask, maxnode);
+}
+
+// Creates a cleanup object that resets the calling thread's mempolicy to the
+// system default when the calling scope ends.
+Cleanup ScopedMempolicy() {
+ return Cleanup([] {
+ EXPECT_THAT(set_mempolicy(MPOL_DEFAULT, nullptr, 0), SyscallSucceeds());
+ });
+}
+
+// Temporarily change the memory policy for the calling thread within the
+// caller's scope.
+PosixErrorOr<Cleanup> ScopedSetMempolicy(int mode, uint64_t *nmask,
+ uint64_t maxnode) {
+ if (set_mempolicy(mode, nmask, maxnode)) {
+ return PosixError(errno, "set_mempolicy");
+ }
+ return ScopedMempolicy();
+}
+
+TEST(MempolicyTest, CheckDefaultPolicy) {
+ int mode = 0;
+ uint64_t nodemask = 0;
+ ASSERT_THAT(get_mempolicy(&mode, &nodemask, sizeof(nodemask) * BITS_PER_BYTE,
+ nullptr, 0),
+ SyscallSucceeds());
+
+ EXPECT_EQ(MPOL_DEFAULT, mode);
+ EXPECT_EQ(0x0, nodemask);
+}
+
+TEST(MempolicyTest, PolicyPreservedAfterSetMempolicy) {
+ uint64_t nodemask = 0x1;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSetMempolicy(
+ MPOL_BIND, &nodemask, sizeof(nodemask) * BITS_PER_BYTE));
+
+ int mode = 0;
+ uint64_t nodemask_after = 0x0;
+ ASSERT_THAT(get_mempolicy(&mode, &nodemask_after,
+ sizeof(nodemask_after) * BITS_PER_BYTE, nullptr, 0),
+ SyscallSucceeds());
+ EXPECT_EQ(MPOL_BIND, mode);
+ EXPECT_EQ(0x1, nodemask_after);
+
+ // Try throw in some mode flags.
+ for (auto mode_flag : {MPOL_F_STATIC_NODES, MPOL_F_RELATIVE_NODES}) {
+ auto cleanup2 = ASSERT_NO_ERRNO_AND_VALUE(
+ ScopedSetMempolicy(MPOL_INTERLEAVE | mode_flag, &nodemask,
+ sizeof(nodemask) * BITS_PER_BYTE));
+ mode = 0;
+ nodemask_after = 0x0;
+ ASSERT_THAT(
+ get_mempolicy(&mode, &nodemask_after,
+ sizeof(nodemask_after) * BITS_PER_BYTE, nullptr, 0),
+ SyscallSucceeds());
+ EXPECT_EQ(MPOL_INTERLEAVE | mode_flag, mode);
+ EXPECT_EQ(0x1, nodemask_after);
+ }
+}
+
+TEST(MempolicyTest, SetMempolicyRejectsInvalidInputs) {
+ auto cleanup = ScopedMempolicy();
+ uint64_t nodemask;
+
+ if (IsRunningOnGvisor()) {
+ // Invalid nodemask, we only support a single node on gvisor.
+ nodemask = 0x4;
+ ASSERT_THAT(set_mempolicy(MPOL_DEFAULT, &nodemask,
+ sizeof(nodemask) * BITS_PER_BYTE),
+ SyscallFailsWithErrno(EINVAL));
+ }
+
+ nodemask = 0x1;
+
+ // Invalid mode.
+ ASSERT_THAT(set_mempolicy(7439, &nodemask, sizeof(nodemask) * BITS_PER_BYTE),
+ SyscallFailsWithErrno(EINVAL));
+
+ // Invalid nodemask size.
+ ASSERT_THAT(set_mempolicy(MPOL_DEFAULT, &nodemask, 0),
+ SyscallFailsWithErrno(EINVAL));
+
+ // Invalid mode flag.
+ ASSERT_THAT(
+ set_mempolicy(MPOL_DEFAULT | MPOL_F_STATIC_NODES | MPOL_F_RELATIVE_NODES,
+ &nodemask, sizeof(nodemask) * BITS_PER_BYTE),
+ SyscallFailsWithErrno(EINVAL));
+
+ // MPOL_INTERLEAVE with empty nodemask.
+ nodemask = 0x0;
+ ASSERT_THAT(set_mempolicy(MPOL_INTERLEAVE, &nodemask,
+ sizeof(nodemask) * BITS_PER_BYTE),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+// The manpages specify that the nodemask provided to set_mempolicy are
+// considered empty if the nodemask pointer is null, or if the nodemask size is
+// 0. We use a policy which accepts both empty and non-empty nodemasks
+// (MPOL_PREFERRED), a policy which requires a non-empty nodemask (MPOL_BIND),
+// and a policy which completely ignores the nodemask (MPOL_DEFAULT) to verify
+// argument checking around nodemasks.
+TEST(MempolicyTest, EmptyNodemaskOnSet) {
+ auto cleanup = ScopedMempolicy();
+
+ EXPECT_THAT(set_mempolicy(MPOL_DEFAULT, nullptr, 1), SyscallSucceeds());
+ EXPECT_THAT(set_mempolicy(MPOL_BIND, nullptr, 1),
+ SyscallFailsWithErrno(EINVAL));
+ EXPECT_THAT(set_mempolicy(MPOL_PREFERRED, nullptr, 1), SyscallSucceeds());
+
+ uint64_t nodemask = 0x1;
+ EXPECT_THAT(set_mempolicy(MPOL_DEFAULT, &nodemask, 0),
+ SyscallFailsWithErrno(EINVAL));
+ EXPECT_THAT(set_mempolicy(MPOL_BIND, &nodemask, 0),
+ SyscallFailsWithErrno(EINVAL));
+ EXPECT_THAT(set_mempolicy(MPOL_PREFERRED, &nodemask, 0),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(MempolicyTest, QueryAvailableNodes) {
+ uint64_t nodemask = 0;
+ ASSERT_THAT(
+ get_mempolicy(nullptr, &nodemask, sizeof(nodemask) * BITS_PER_BYTE,
+ nullptr, MPOL_F_MEMS_ALLOWED),
+ SyscallSucceeds());
+ // We can only be sure there is a single node if running on gvisor.
+ if (IsRunningOnGvisor()) {
+ EXPECT_EQ(0x1, nodemask);
+ }
+
+ // MPOL_F_ADDR and MPOL_F_NODE flags may not be combined with
+ // MPOL_F_MEMS_ALLLOWED.
+ for (auto flags :
+ {MPOL_F_MEMS_ALLOWED | MPOL_F_ADDR, MPOL_F_MEMS_ALLOWED | MPOL_F_NODE,
+ MPOL_F_MEMS_ALLOWED | MPOL_F_ADDR | MPOL_F_NODE}) {
+ ASSERT_THAT(get_mempolicy(nullptr, &nodemask,
+ sizeof(nodemask) * BITS_PER_BYTE, nullptr, flags),
+ SyscallFailsWithErrno(EINVAL));
+ }
+}
+
+TEST(MempolicyTest, GetMempolicyQueryNodeForAddress) {
+ uint64_t dummy_stack_address;
+ auto dummy_heap_address = absl::make_unique<uint64_t>();
+ int mode;
+
+ for (auto ptr : {&dummy_stack_address, dummy_heap_address.get()}) {
+ mode = -1;
+ ASSERT_THAT(
+ get_mempolicy(&mode, nullptr, 0, ptr, MPOL_F_ADDR | MPOL_F_NODE),
+ SyscallSucceeds());
+ // If we're not running on gvisor, the address may be allocated on a
+ // different numa node.
+ if (IsRunningOnGvisor()) {
+ EXPECT_EQ(0, mode);
+ }
+ }
+
+ void* invalid_address = reinterpret_cast<void*>(-1);
+
+ // Invalid address.
+ ASSERT_THAT(get_mempolicy(&mode, nullptr, 0, invalid_address,
+ MPOL_F_ADDR | MPOL_F_NODE),
+ SyscallFailsWithErrno(EFAULT));
+
+ // Invalid mode pointer.
+ ASSERT_THAT(get_mempolicy(reinterpret_cast<int*>(invalid_address), nullptr, 0,
+ &dummy_stack_address, MPOL_F_ADDR | MPOL_F_NODE),
+ SyscallFailsWithErrno(EFAULT));
+}
+
+TEST(MempolicyTest, GetMempolicyCanOmitPointers) {
+ int mode;
+ uint64_t nodemask;
+
+ // Omit nodemask pointer.
+ ASSERT_THAT(get_mempolicy(&mode, nullptr, 0, nullptr, 0), SyscallSucceeds());
+ // Omit mode pointer.
+ ASSERT_THAT(get_mempolicy(nullptr, &nodemask,
+ sizeof(nodemask) * BITS_PER_BYTE, nullptr, 0),
+ SyscallSucceeds());
+ // Omit both pointers.
+ ASSERT_THAT(get_mempolicy(nullptr, nullptr, 0, nullptr, 0),
+ SyscallSucceeds());
+}
+
+TEST(MempolicyTest, GetMempolicyNextInterleaveNode) {
+ int mode;
+ // Policy for thread not yet set to MPOL_INTERLEAVE, can't query for
+ // the next node which will be used for allocation.
+ ASSERT_THAT(get_mempolicy(&mode, nullptr, 0, nullptr, MPOL_F_NODE),
+ SyscallFailsWithErrno(EINVAL));
+
+ // Set default policy for thread to MPOL_INTERLEAVE.
+ uint64_t nodemask = 0x1;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSetMempolicy(
+ MPOL_INTERLEAVE, &nodemask, sizeof(nodemask) * BITS_PER_BYTE));
+
+ mode = -1;
+ ASSERT_THAT(get_mempolicy(&mode, nullptr, 0, nullptr, MPOL_F_NODE),
+ SyscallSucceeds());
+ EXPECT_EQ(0, mode);
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/mincore.cc b/test/syscalls/linux/mincore.cc
new file mode 100644
index 000000000..c572bf5ec
--- /dev/null
+++ b/test/syscalls/linux/mincore.cc
@@ -0,0 +1,96 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "test/util/memory_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+size_t CountSetLSBs(std::vector<unsigned char> const& vec) {
+ return std::count_if(begin(vec), end(vec),
+ [](unsigned char c) { return (c & 1) != 0; });
+}
+
+TEST(MincoreTest, DirtyAnonPagesAreResident) {
+ constexpr size_t kTestPageCount = 10;
+ auto const kTestMappingBytes = kTestPageCount * kPageSize;
+ auto m = ASSERT_NO_ERRNO_AND_VALUE(
+ MmapAnon(kTestMappingBytes, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+ memset(m.ptr(), 0, m.len());
+
+ std::vector<unsigned char> vec(kTestPageCount, 0);
+ ASSERT_THAT(mincore(m.ptr(), kTestMappingBytes, vec.data()),
+ SyscallSucceeds());
+ EXPECT_EQ(kTestPageCount, CountSetLSBs(vec));
+}
+
+TEST(MincoreTest, UnalignedAddressFails) {
+ // Map and touch two pages, then try to mincore the second half of the first
+ // page + the first half of the second page. Both pages are mapped, but
+ // mincore should return EINVAL due to the misaligned start address.
+ constexpr size_t kTestPageCount = 2;
+ auto const kTestMappingBytes = kTestPageCount * kPageSize;
+ auto m = ASSERT_NO_ERRNO_AND_VALUE(
+ MmapAnon(kTestMappingBytes, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+ memset(m.ptr(), 0, m.len());
+
+ std::vector<unsigned char> vec(kTestPageCount, 0);
+ EXPECT_THAT(mincore(reinterpret_cast<void*>(m.addr() + kPageSize / 2),
+ kPageSize, vec.data()),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(MincoreTest, UnalignedLengthSucceedsAndIsRoundedUp) {
+ // Map and touch two pages, then try to mincore the first page + the first
+ // half of the second page. mincore should silently round up the length to
+ // include both pages.
+ constexpr size_t kTestPageCount = 2;
+ auto const kTestMappingBytes = kTestPageCount * kPageSize;
+ auto m = ASSERT_NO_ERRNO_AND_VALUE(
+ MmapAnon(kTestMappingBytes, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+ memset(m.ptr(), 0, m.len());
+
+ std::vector<unsigned char> vec(kTestPageCount, 0);
+ ASSERT_THAT(mincore(m.ptr(), kPageSize + kPageSize / 2, vec.data()),
+ SyscallSucceeds());
+ EXPECT_EQ(kTestPageCount, CountSetLSBs(vec));
+}
+
+TEST(MincoreTest, ZeroLengthSucceedsAndAllowsAnyVecBelowTaskSize) {
+ EXPECT_THAT(mincore(nullptr, 0, nullptr), SyscallSucceeds());
+}
+
+TEST(MincoreTest, InvalidLengthFails) {
+ EXPECT_THAT(mincore(nullptr, -1, nullptr), SyscallFailsWithErrno(ENOMEM));
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/mkdir.cc b/test/syscalls/linux/mkdir.cc
new file mode 100644
index 000000000..84db45eb3
--- /dev/null
+++ b/test/syscalls/linux/mkdir.cc
@@ -0,0 +1,96 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/temp_umask.h"
+#include "test/util/capability_util.h"
+#include "test/util/fs_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+class MkdirTest : public ::testing::Test {
+ protected:
+ // SetUp creates various configurations of files.
+ void SetUp() override { dirname_ = NewTempAbsPath(); }
+
+ // TearDown unlinks created files.
+ void TearDown() override {
+ // FIXME: We don't currently implement rmdir.
+ // We do this unconditionally because there's no harm in trying.
+ rmdir(dirname_.c_str());
+ }
+
+ std::string dirname_;
+};
+
+TEST_F(MkdirTest, DISABLED_CanCreateReadbleDir) {
+ ASSERT_THAT(mkdir(dirname_.c_str(), 0444), SyscallSucceeds());
+ ASSERT_THAT(
+ open(JoinPath(dirname_, "anything").c_str(), O_RDWR | O_CREAT, 0666),
+ SyscallFailsWithErrno(EACCES));
+}
+
+TEST_F(MkdirTest, CanCreateWritableDir) {
+ ASSERT_THAT(mkdir(dirname_.c_str(), 0777), SyscallSucceeds());
+ std::string filename = JoinPath(dirname_, "anything");
+ int fd;
+ ASSERT_THAT(fd = open(filename.c_str(), O_RDWR | O_CREAT, 0666),
+ SyscallSucceeds());
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+ ASSERT_THAT(unlink(filename.c_str()), SyscallSucceeds());
+}
+
+TEST_F(MkdirTest, HonorsUmask) {
+ constexpr mode_t kMask = 0111;
+ TempUmask mask(kMask);
+ ASSERT_THAT(mkdir(dirname_.c_str(), 0777), SyscallSucceeds());
+ struct stat statbuf;
+ ASSERT_THAT(stat(dirname_.c_str(), &statbuf), SyscallSucceeds());
+ EXPECT_EQ(0777 & ~kMask, statbuf.st_mode & 0777);
+}
+
+TEST_F(MkdirTest, HonorsUmask2) {
+ constexpr mode_t kMask = 0142;
+ TempUmask mask(kMask);
+ ASSERT_THAT(mkdir(dirname_.c_str(), 0777), SyscallSucceeds());
+ struct stat statbuf;
+ ASSERT_THAT(stat(dirname_.c_str(), &statbuf), SyscallSucceeds());
+ EXPECT_EQ(0777 & ~kMask, statbuf.st_mode & 0777);
+}
+
+TEST_F(MkdirTest, FailsOnDirWithoutWritePerms) {
+ // Drop capabilities that allow us to override file and directory permissions.
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+ auto parent = ASSERT_NO_ERRNO_AND_VALUE(
+ TempPath::CreateDirWith(GetAbsoluteTestTmpdir(), 0555));
+ auto dir = JoinPath(parent.path(), "foo");
+ ASSERT_THAT(mkdir(dir.c_str(), 0777), SyscallFailsWithErrno(EACCES));
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/mknod.cc b/test/syscalls/linux/mknod.cc
new file mode 100644
index 000000000..361ca299b
--- /dev/null
+++ b/test/syscalls/linux/mknod.cc
@@ -0,0 +1,173 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/un.h>
+#include <unistd.h>
+
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(MknodTest, RegularFile) {
+ std::string const node0 = NewTempAbsPathInDir("/tmp");
+ std::string const node1 = NewTempAbsPathInDir("/tmp");
+ ASSERT_THAT(mknod(node0.c_str(), S_IFREG, 0), SyscallSucceeds());
+ ASSERT_THAT(mknod(node1.c_str(), 0, 0), SyscallSucceeds());
+}
+
+TEST(MknodTest, MknodAtRegularFile) {
+ std::string const fifo_relpath = NewTempRelPath();
+ std::string const fifo = JoinPath("/tmp", fifo_relpath);
+ int dirfd;
+ ASSERT_THAT(dirfd = open("/tmp", O_RDONLY), SyscallSucceeds());
+ ASSERT_THAT(mknodat(dirfd, fifo_relpath.c_str(), S_IFIFO | S_IRUSR, 0),
+ SyscallSucceeds());
+ EXPECT_THAT(close(dirfd), SyscallSucceeds());
+
+ struct stat st;
+ ASSERT_THAT(stat(fifo.c_str(), &st), SyscallSucceeds());
+ EXPECT_TRUE(S_ISFIFO(st.st_mode));
+}
+
+TEST(MknodTest, MknodOnExistingPathFails) {
+ std::string const file = NewTempAbsPathInDir("/tmp");
+ std::string const slink = NewTempAbsPathInDir("/tmp");
+ int fd;
+ ASSERT_THAT(fd = open(file.c_str(), O_CREAT | O_RDWR, S_IRUSR | S_IWUSR),
+ SyscallSucceeds());
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+ ASSERT_THAT(symlink(file.c_str(), slink.c_str()), SyscallSucceeds());
+
+ EXPECT_THAT(mknod(file.c_str(), S_IFREG, 0), SyscallFailsWithErrno(EEXIST));
+ EXPECT_THAT(mknod(file.c_str(), S_IFIFO, 0), SyscallFailsWithErrno(EEXIST));
+ EXPECT_THAT(mknod(slink.c_str(), S_IFREG, 0), SyscallFailsWithErrno(EEXIST));
+ EXPECT_THAT(mknod(slink.c_str(), S_IFIFO, 0), SyscallFailsWithErrno(EEXIST));
+}
+
+TEST(MknodTest, UnimplementedTypesReturnError) {
+ if (IsRunningOnGvisor()) {
+ ASSERT_THAT(mknod("/tmp/a_socket", S_IFSOCK, 0),
+ SyscallFailsWithErrno(EOPNOTSUPP));
+ }
+ // These will fail on linux as well since we don't have CAP_MKNOD.
+ ASSERT_THAT(mknod("/tmp/a_chardev", S_IFCHR, 0),
+ SyscallFailsWithErrno(EPERM));
+ ASSERT_THAT(mknod("/tmp/a_blkdev", S_IFBLK, 0), SyscallFailsWithErrno(EPERM));
+}
+
+TEST(MknodTest, Fifo) {
+ std::string const fifo = NewTempAbsPathInDir("/tmp");
+ ASSERT_THAT(mknod(fifo.c_str(), S_IFIFO | S_IRUSR | S_IWUSR, 0),
+ SyscallSucceeds());
+
+ struct stat st;
+ ASSERT_THAT(stat(fifo.c_str(), &st), SyscallSucceeds());
+ EXPECT_TRUE(S_ISFIFO(st.st_mode));
+
+ std::string msg = "some string";
+ std::vector<char> buf(512);
+
+ // Read-end of the pipe.
+ ScopedThread t([&fifo, &buf, &msg]() {
+ int fd;
+ ASSERT_THAT(fd = open(fifo.c_str(), O_RDONLY), SyscallSucceeds());
+ EXPECT_THAT(read(fd, buf.data(), buf.size()),
+ SyscallSucceedsWithValue(msg.length()));
+ EXPECT_EQ(msg, std::string(buf.data()));
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+ });
+
+ // Write-end of the pipe.
+ int wfd;
+ ASSERT_THAT(wfd = open(fifo.c_str(), O_WRONLY), SyscallSucceeds());
+ EXPECT_THAT(write(wfd, msg.c_str(), msg.length()),
+ SyscallSucceedsWithValue(msg.length()));
+ EXPECT_THAT(close(wfd), SyscallSucceeds());
+}
+
+TEST(MknodTest, FifoOtrunc) {
+ std::string const fifo = NewTempAbsPathInDir("/tmp");
+ ASSERT_THAT(mknod(fifo.c_str(), S_IFIFO | S_IRUSR | S_IWUSR, 0),
+ SyscallSucceeds());
+
+ struct stat st = {};
+ ASSERT_THAT(stat(fifo.c_str(), &st), SyscallSucceeds());
+ EXPECT_TRUE(S_ISFIFO(st.st_mode));
+
+ std::string msg = "some string";
+ std::vector<char> buf(512);
+ // Read-end of the pipe.
+ ScopedThread t([&fifo, &buf, &msg]() {
+ int fd;
+ ASSERT_THAT(fd = open(fifo.c_str(), O_RDONLY), SyscallSucceeds());
+ EXPECT_THAT(read(fd, buf.data(), buf.size()),
+ SyscallSucceedsWithValue(msg.length()));
+ EXPECT_EQ(msg, std::string(buf.data()));
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+ });
+
+ int wfd;
+ ASSERT_THAT(wfd = open(fifo.c_str(), O_TRUNC | O_WRONLY), SyscallSucceeds());
+ EXPECT_THAT(write(wfd, msg.c_str(), msg.length()),
+ SyscallSucceedsWithValue(msg.length()));
+ EXPECT_THAT(close(wfd), SyscallSucceeds());
+}
+
+TEST(MknodTest, FifoTruncNoOp) {
+ std::string const fifo = NewTempAbsPathInDir("/tmp");
+ ASSERT_THAT(mknod(fifo.c_str(), S_IFIFO | S_IRUSR | S_IWUSR, 0),
+ SyscallSucceeds());
+
+ EXPECT_THAT(truncate(fifo.c_str(), 0), SyscallFailsWithErrno(EINVAL));
+
+ struct stat st = {};
+ ASSERT_THAT(stat(fifo.c_str(), &st), SyscallSucceeds());
+ EXPECT_TRUE(S_ISFIFO(st.st_mode));
+
+ std::string msg = "some string";
+ std::vector<char> buf(512);
+ // Read-end of the pipe.
+ ScopedThread t([&fifo, &buf, &msg]() {
+ int rfd = 0;
+ ASSERT_THAT(rfd = open(fifo.c_str(), O_RDONLY), SyscallSucceeds());
+ EXPECT_THAT(ReadFd(rfd, buf.data(), buf.size()),
+ SyscallSucceedsWithValue(msg.length()));
+ EXPECT_EQ(msg, std::string(buf.data()));
+ EXPECT_THAT(close(rfd), SyscallSucceeds());
+ });
+
+ int wfd = 0;
+ ASSERT_THAT(wfd = open(fifo.c_str(), O_TRUNC | O_WRONLY), SyscallSucceeds());
+ EXPECT_THAT(ftruncate(wfd, 0), SyscallFailsWithErrno(EINVAL));
+ EXPECT_THAT(WriteFd(wfd, msg.c_str(), msg.length()),
+ SyscallSucceedsWithValue(msg.length()));
+ EXPECT_THAT(ftruncate(wfd, 0), SyscallFailsWithErrno(EINVAL));
+ EXPECT_THAT(close(wfd), SyscallSucceeds());
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/mmap.cc b/test/syscalls/linux/mmap.cc
new file mode 100644
index 000000000..afe060d33
--- /dev/null
+++ b/test/syscalls/linux/mmap.cc
@@ -0,0 +1,1714 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/magic.h>
+#include <linux/unistd.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/statfs.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/strings/escaping.h"
+#include "absl/strings/str_split.h"
+#include "test/util/cleanup.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/memory_util.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+using ::testing::Gt;
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+PosixErrorOr<int64_t> VirtualMemorySize() {
+ ASSIGN_OR_RETURN_ERRNO(auto contents, GetContents("/proc/self/statm"));
+ std::vector<std::string> parts = absl::StrSplit(contents, ' ');
+ if (parts.empty()) {
+ return PosixError(EINVAL, "Unable to parse /proc/self/statm");
+ }
+ ASSIGN_OR_RETURN_ERRNO(auto pages, Atoi<int64_t>(parts[0]));
+ return pages * getpagesize();
+}
+
+class MMapTest : public ::testing::Test {
+ protected:
+ // Unmap mapping, if one was made.
+ void TearDown() override {
+ if (addr_) {
+ EXPECT_THAT(Unmap(), SyscallSucceeds());
+ }
+ }
+
+ // Remembers mapping, so it can be automatically unmapped.
+ uintptr_t Map(uintptr_t addr, size_t length, int prot, int flags, int fd,
+ off_t offset) {
+ void* ret =
+ mmap(reinterpret_cast<void*>(addr), length, prot, flags, fd, offset);
+
+ if (ret != MAP_FAILED) {
+ addr_ = ret;
+ length_ = length;
+ }
+
+ return reinterpret_cast<uintptr_t>(ret);
+ }
+
+ // Unmap previous mapping
+ int Unmap() {
+ if (!addr_) {
+ return -1;
+ }
+
+ int ret = munmap(addr_, length_);
+
+ addr_ = nullptr;
+ length_ = 0;
+
+ return ret;
+ }
+
+ // Msync the mapping.
+ int Msync() { return msync(addr_, length_, MS_SYNC); }
+
+ // Mlock the mapping.
+ int Mlock() { return mlock(addr_, length_); }
+
+ // Munlock the mapping.
+ int Munlock() { return munlock(addr_, length_); }
+
+ int Protect(uintptr_t addr, size_t length, int prot) {
+ return mprotect(reinterpret_cast<void*>(addr), length, prot);
+ }
+
+ void* addr_ = nullptr;
+ size_t length_ = 0;
+};
+
+// Matches if arg contains the same contents as std::string str.
+MATCHER_P(EqualsMemory, str, "") {
+ if (0 == memcmp(arg, str.c_str(), str.size())) {
+ return true;
+ }
+
+ *result_listener << "Memory did not match. Got:\n"
+ << absl::BytesToHexString(
+ std::string(static_cast<char*>(arg), str.size()))
+ << "Want:\n"
+ << absl::BytesToHexString(str);
+ return false;
+}
+
+// We can't map pipes, but for different reasons.
+TEST_F(MMapTest, MapPipe) {
+ int fds[2];
+ ASSERT_THAT(pipe(fds), SyscallSucceeds());
+ EXPECT_THAT(Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fds[0], 0),
+ SyscallFailsWithErrno(ENODEV));
+ EXPECT_THAT(Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fds[1], 0),
+ SyscallFailsWithErrno(EACCES));
+ ASSERT_THAT(close(fds[0]), SyscallSucceeds());
+ ASSERT_THAT(close(fds[1]), SyscallSucceeds());
+}
+
+// It's very common to mmap /dev/zero because anonymous mappings aren't part
+// of POSIX although they are widely supported. So a zero initialized memory
+// region would actually come from a "file backed" /dev/zero mapping.
+TEST_F(MMapTest, MapDevZeroShared) {
+ // This test will verify that we're able to map a page backed by /dev/zero
+ // as MAP_SHARED.
+ const FileDescriptor dev_zero =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR));
+
+ // Test that we can create a RW SHARED mapping of /dev/zero.
+ ASSERT_THAT(
+ Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, dev_zero.get(), 0),
+ SyscallSucceeds());
+}
+
+TEST_F(MMapTest, MapDevZeroPrivate) {
+ // This test will verify that we're able to map a page backed by /dev/zero
+ // as MAP_PRIVATE.
+ const FileDescriptor dev_zero =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR));
+
+ // Test that we can create a RW SHARED mapping of /dev/zero.
+ ASSERT_THAT(
+ Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, dev_zero.get(), 0),
+ SyscallSucceeds());
+}
+
+TEST_F(MMapTest, MapDevZeroNoPersistence) {
+ // This test will verify that two independent mappings of /dev/zero do not
+ // appear to reference the same "backed file."
+
+ const FileDescriptor dev_zero1 =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR));
+ const FileDescriptor dev_zero2 =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR));
+
+ ASSERT_THAT(
+ Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, dev_zero1.get(), 0),
+ SyscallSucceeds());
+
+ // Create a second mapping via the second /dev/zero fd.
+ void* psec_map = mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
+ dev_zero2.get(), 0);
+ ASSERT_THAT(reinterpret_cast<intptr_t>(psec_map), SyscallSucceeds());
+
+ // Always unmap.
+ auto cleanup_psec_map = Cleanup(
+ [&] { EXPECT_THAT(munmap(psec_map, kPageSize), SyscallSucceeds()); });
+
+ // Verify that we have independently addressed pages.
+ ASSERT_NE(psec_map, addr_);
+
+ std::string buf_zero(kPageSize, 0x00);
+ std::string buf_ones(kPageSize, 0xFF);
+
+ // Verify the first is actually all zeros after mmap.
+ EXPECT_THAT(addr_, EqualsMemory(buf_zero));
+
+ // Let's fill in the first mapping with 0xFF.
+ memcpy(addr_, buf_ones.data(), kPageSize);
+
+ // Verify that the memcpy actually stuck in the page.
+ EXPECT_THAT(addr_, EqualsMemory(buf_ones));
+
+ // Verify that it didn't affect the second page which should be all zeros.
+ EXPECT_THAT(psec_map, EqualsMemory(buf_zero));
+}
+
+TEST_F(MMapTest, MapDevZeroSharedMultiplePages) {
+ // This will test that we're able to map /dev/zero over multiple pages.
+ const FileDescriptor dev_zero =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR));
+
+ // Test that we can create a RW SHARED mapping of /dev/zero.
+ ASSERT_THAT(Map(0, kPageSize * 2, PROT_READ | PROT_WRITE, MAP_PRIVATE,
+ dev_zero.get(), 0),
+ SyscallSucceeds());
+
+ std::string buf_zero(kPageSize * 2, 0x00);
+ std::string buf_ones(kPageSize * 2, 0xFF);
+
+ // Verify the two pages are actually all zeros after mmap.
+ EXPECT_THAT(addr_, EqualsMemory(buf_zero));
+
+ // Fill out the pages with all ones.
+ memcpy(addr_, buf_ones.data(), kPageSize * 2);
+
+ // Verify that the memcpy actually stuck in the pages.
+ EXPECT_THAT(addr_, EqualsMemory(buf_ones));
+}
+
+TEST_F(MMapTest, MapDevZeroSharedFdNoPersistence) {
+ // This test will verify that two independent mappings of /dev/zero do not
+ // appear to reference the same "backed file" even when mapped from the
+ // same initial fd.
+ const FileDescriptor dev_zero =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR));
+
+ ASSERT_THAT(
+ Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, dev_zero.get(), 0),
+ SyscallSucceeds());
+
+ // Create a second mapping via the same fd.
+ void* psec_map = mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
+ dev_zero.get(), 0);
+ ASSERT_THAT(reinterpret_cast<int64_t>(psec_map), SyscallSucceeds());
+
+ // Always unmap.
+ auto cleanup_psec_map = Cleanup(
+ [&] { ASSERT_THAT(munmap(psec_map, kPageSize), SyscallSucceeds()); });
+
+ // Verify that we have independently addressed pages.
+ ASSERT_NE(psec_map, addr_);
+
+ std::string buf_zero(kPageSize, 0x00);
+ std::string buf_ones(kPageSize, 0xFF);
+
+ // Verify the first is actually all zeros after mmap.
+ EXPECT_THAT(addr_, EqualsMemory(buf_zero));
+
+ // Let's fill in the first mapping with 0xFF.
+ memcpy(addr_, buf_ones.data(), kPageSize);
+
+ // Verify that the memcpy actually stuck in the page.
+ EXPECT_THAT(addr_, EqualsMemory(buf_ones));
+
+ // Verify that it didn't affect the second page which should be all zeros.
+ EXPECT_THAT(psec_map, EqualsMemory(buf_zero));
+}
+
+TEST_F(MMapTest, MapDevZeroSegfaultAfterUnmap) {
+ SetupGvisorDeathTest();
+
+ // This test will verify that we're able to map a page backed by /dev/zero
+ // as MAP_SHARED and after it's unmapped any access results in a SIGSEGV.
+ // This test is redundant but given the special nature of /dev/zero mappings
+ // it doesn't hurt.
+ const FileDescriptor dev_zero =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR));
+
+ const auto rest = [&] {
+ // Test that we can create a RW SHARED mapping of /dev/zero.
+ TEST_PCHECK(Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
+ dev_zero.get(),
+ 0) != reinterpret_cast<uintptr_t>(MAP_FAILED));
+
+ // Confirm that accesses after the unmap result in a SIGSEGV.
+ //
+ // N.B. We depend on this process being single-threaded to ensure there
+ // can't be another mmap to map addr before the dereference below.
+ void* addr_saved = addr_; // Unmap resets addr_.
+ TEST_PCHECK(Unmap() == 0);
+ *reinterpret_cast<volatile int*>(addr_saved) = 0xFF;
+ };
+
+ EXPECT_THAT(InForkedProcess(rest),
+ IsPosixErrorOkAndHolds(W_EXITCODE(0, SIGSEGV)));
+}
+
+TEST_F(MMapTest, MapDevZeroUnaligned) {
+ const FileDescriptor dev_zero =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDWR));
+ const size_t size = kPageSize + kPageSize / 2;
+ const std::string buf_zero(size, 0x00);
+
+ ASSERT_THAT(
+ Map(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, dev_zero.get(), 0),
+ SyscallSucceeds());
+ EXPECT_THAT(addr_, EqualsMemory(buf_zero));
+ ASSERT_THAT(Unmap(), SyscallSucceeds());
+
+ ASSERT_THAT(
+ Map(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, dev_zero.get(), 0),
+ SyscallSucceeds());
+ EXPECT_THAT(addr_, EqualsMemory(buf_zero));
+}
+
+// We can't map _some_ character devices.
+TEST_F(MMapTest, MapCharDevice) {
+ const FileDescriptor cdevfd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/random", 0, 0));
+ EXPECT_THAT(Map(0, kPageSize, PROT_READ, MAP_PRIVATE, cdevfd.get(), 0),
+ SyscallFailsWithErrno(ENODEV));
+}
+
+// We can't map directories.
+TEST_F(MMapTest, MapDirectory) {
+ const FileDescriptor dirfd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(GetAbsoluteTestTmpdir(), 0, 0));
+ EXPECT_THAT(Map(0, kPageSize, PROT_READ, MAP_PRIVATE, dirfd.get(), 0),
+ SyscallFailsWithErrno(ENODEV));
+}
+
+// We can map *something*
+TEST_F(MMapTest, MapAnything) {
+ EXPECT_THAT(Map(0, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+ SyscallSucceedsWithValue(Gt(0)));
+}
+
+// Map length < PageSize allowed
+TEST_F(MMapTest, SmallMap) {
+ EXPECT_THAT(Map(0, 128, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+ SyscallSucceeds());
+}
+
+// Hint address doesn't break anything.
+// Note: there is no requirement we actually get the hint address
+TEST_F(MMapTest, HintAddress) {
+ EXPECT_THAT(
+ Map(0x30000000, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+ SyscallSucceeds());
+}
+
+// MAP_FIXED gives us exactly the requested address
+TEST_F(MMapTest, MapFixed) {
+ EXPECT_THAT(Map(0x30000000, kPageSize, PROT_NONE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0),
+ SyscallSucceedsWithValue(0x30000000));
+}
+
+// 64-bit addresses work too
+#ifdef __x86_64__
+TEST_F(MMapTest, MapFixed64) {
+ EXPECT_THAT(Map(0x300000000000, kPageSize, PROT_NONE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0),
+ SyscallSucceedsWithValue(0x300000000000));
+}
+#endif
+
+// MAP_STACK allowed.
+// There isn't a good way to verify it did anything.
+TEST_F(MMapTest, MapStack) {
+ EXPECT_THAT(Map(0, kPageSize, PROT_NONE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0),
+ SyscallSucceeds());
+}
+
+// MAP_LOCKED allowed.
+// There isn't a good way to verify it did anything.
+TEST_F(MMapTest, MapLocked) {
+ EXPECT_THAT(Map(0, kPageSize, PROT_NONE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_LOCKED, -1, 0),
+ SyscallSucceeds());
+}
+
+// MAP_PRIVATE or MAP_SHARED must be passed
+TEST_F(MMapTest, NotPrivateOrShared) {
+ EXPECT_THAT(Map(0, kPageSize, PROT_NONE, MAP_ANONYMOUS, -1, 0),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+// Only one of MAP_PRIVATE or MAP_SHARED may be passed
+TEST_F(MMapTest, PrivateAndShared) {
+ EXPECT_THAT(Map(0, kPageSize, PROT_NONE,
+ MAP_PRIVATE | MAP_SHARED | MAP_ANONYMOUS, -1, 0),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_F(MMapTest, FixedAlignment) {
+ // Addr must be page aligned (MAP_FIXED)
+ EXPECT_THAT(Map(0x30000001, kPageSize, PROT_NONE,
+ MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, -1, 0),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+// Non-MAP_FIXED address does not need to be page aligned
+TEST_F(MMapTest, NonFixedAlignment) {
+ EXPECT_THAT(
+ Map(0x30000001, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+ SyscallSucceeds());
+}
+
+// Length = 0 results in EINVAL.
+TEST_F(MMapTest, InvalidLength) {
+ EXPECT_THAT(Map(0, 0, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+// Bad fd not allowed.
+TEST_F(MMapTest, BadFd) {
+ EXPECT_THAT(Map(0, kPageSize, PROT_NONE, MAP_PRIVATE, 999, 0),
+ SyscallFailsWithErrno(EBADF));
+}
+
+// Mappings are writable.
+TEST_F(MMapTest, ProtWrite) {
+ uint64_t addr;
+ constexpr uint8_t kFirstWord[] = {42, 42, 42, 42};
+
+ EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+ SyscallSucceeds());
+
+ // This shouldn't cause a SIGSEGV.
+ memset(reinterpret_cast<void*>(addr), 42, kPageSize);
+
+ // The written data should actually be there.
+ EXPECT_EQ(
+ 0, memcmp(reinterpret_cast<void*>(addr), kFirstWord, sizeof(kFirstWord)));
+}
+
+// "Write-only" mappings are writable *and* readable.
+TEST_F(MMapTest, ProtWriteOnly) {
+ uint64_t addr;
+ constexpr uint8_t kFirstWord[] = {42, 42, 42, 42};
+
+ EXPECT_THAT(
+ addr = Map(0, kPageSize, PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+ SyscallSucceeds());
+
+ // This shouldn't cause a SIGSEGV.
+ memset(reinterpret_cast<void*>(addr), 42, kPageSize);
+
+ // The written data should actually be there.
+ EXPECT_EQ(
+ 0, memcmp(reinterpret_cast<void*>(addr), kFirstWord, sizeof(kFirstWord)));
+}
+
+// "Write-only" mappings are readable.
+//
+// This is distinct from above to ensure the page is accessible even if the
+// initial fault is a write fault.
+TEST_F(MMapTest, ProtWriteOnlyReadable) {
+ uint64_t addr;
+ constexpr uint64_t kFirstWord = 0;
+
+ EXPECT_THAT(
+ addr = Map(0, kPageSize, PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+ SyscallSucceeds());
+
+ EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), &kFirstWord,
+ sizeof(kFirstWord)));
+}
+
+// Mappings are writable after mprotect from PROT_NONE to PROT_READ|PROT_WRITE.
+TEST_F(MMapTest, ProtectProtWrite) {
+ uint64_t addr;
+ constexpr uint8_t kFirstWord[] = {42, 42, 42, 42};
+
+ EXPECT_THAT(
+ addr = Map(0, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+ SyscallSucceeds());
+
+ ASSERT_THAT(Protect(addr, kPageSize, PROT_READ | PROT_WRITE),
+ SyscallSucceeds());
+
+ // This shouldn't cause a SIGSEGV.
+ memset(reinterpret_cast<void*>(addr), 42, kPageSize);
+
+ // The written data should actually be there.
+ EXPECT_EQ(
+ 0, memcmp(reinterpret_cast<void*>(addr), kFirstWord, sizeof(kFirstWord)));
+}
+
+// SIGSEGV raised when reading PROT_NONE memory
+TEST_F(MMapTest, ProtNoneDeath) {
+ SetupGvisorDeathTest();
+
+ uintptr_t addr;
+
+ ASSERT_THAT(
+ addr = Map(0, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+ SyscallSucceeds());
+
+ EXPECT_EXIT(*reinterpret_cast<volatile int*>(addr),
+ ::testing::KilledBySignal(SIGSEGV), "");
+}
+
+// SIGSEGV raised when writing PROT_READ only memory
+TEST_F(MMapTest, ReadOnlyDeath) {
+ SetupGvisorDeathTest();
+
+ uintptr_t addr;
+
+ ASSERT_THAT(
+ addr = Map(0, kPageSize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+ SyscallSucceeds());
+
+ EXPECT_EXIT(*reinterpret_cast<volatile int*>(addr) = 42,
+ ::testing::KilledBySignal(SIGSEGV), "");
+}
+
+// Writable mapping mprotect'd to read-only should not be writable.
+TEST_F(MMapTest, MprotectReadOnlyDeath) {
+ SetupGvisorDeathTest();
+
+ uintptr_t addr;
+
+ ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+ SyscallSucceeds());
+
+ volatile int* val = reinterpret_cast<int*>(addr);
+
+ // Copy to ensure page is mapped in.
+ *val = 42;
+
+ ASSERT_THAT(Protect(addr, kPageSize, PROT_READ), SyscallSucceeds());
+
+ // Now it shouldn't be writable.
+ EXPECT_EXIT(*val = 0, ::testing::KilledBySignal(SIGSEGV), "");
+}
+
+// Verify that calling mprotect an address that's not page aligned fails.
+TEST_F(MMapTest, MprotectNotPageAligned) {
+ uintptr_t addr;
+
+ ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+ SyscallSucceeds());
+ ASSERT_THAT(Protect(addr + 1, kPageSize - 1, PROT_READ),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+// Verify that calling mprotect with an absurdly huge length fails.
+TEST_F(MMapTest, MprotectHugeLength) {
+ uintptr_t addr;
+
+ ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+ SyscallSucceeds());
+ ASSERT_THAT(Protect(addr, static_cast<size_t>(-1), PROT_READ),
+ SyscallFailsWithErrno(ENOMEM));
+}
+
+#if defined(__x86_64__) || defined(__i386__)
+// This code is equivalent in 32 and 64-bit mode
+const uint8_t machine_code[] = {
+ 0xb8, 0x2a, 0x00, 0x00, 0x00, // movl $42, %eax
+ 0xc3, // retq
+};
+
+// PROT_EXEC allows code execution
+TEST_F(MMapTest, ProtExec) {
+ uintptr_t addr;
+ uint32_t (*func)(void);
+
+ EXPECT_THAT(addr = Map(0, kPageSize, PROT_EXEC | PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+ SyscallSucceeds());
+
+ memcpy(reinterpret_cast<void*>(addr), machine_code, sizeof(machine_code));
+
+ func = reinterpret_cast<uint32_t (*)(void)>(addr);
+
+ EXPECT_EQ(42, func());
+}
+
+// No PROT_EXEC disallows code execution
+TEST_F(MMapTest, NoProtExecDeath) {
+ SetupGvisorDeathTest();
+
+ uintptr_t addr;
+ uint32_t (*func)(void);
+
+ EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+ SyscallSucceeds());
+
+ memcpy(reinterpret_cast<void*>(addr), machine_code, sizeof(machine_code));
+
+ func = reinterpret_cast<uint32_t (*)(void)>(addr);
+
+ EXPECT_EXIT(func(), ::testing::KilledBySignal(SIGSEGV), "");
+}
+#endif
+
+TEST_F(MMapTest, NoExceedLimitData) {
+ void* prevbrk;
+ void* target_brk;
+ struct rlimit setlim;
+
+ prevbrk = sbrk(0);
+ ASSERT_NE(-1, reinterpret_cast<intptr_t>(prevbrk));
+ target_brk = reinterpret_cast<char*>(prevbrk) + 1;
+
+ setlim.rlim_cur = RLIM_INFINITY;
+ setlim.rlim_max = RLIM_INFINITY;
+ ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds());
+ EXPECT_THAT(brk(target_brk), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(MMapTest, ExceedLimitData) {
+ // To unit test this more precisely, we'd need access to the mm's start_brk
+ // and end_brk, which we don't have direct access to :/
+ void* prevbrk;
+ void* target_brk;
+ struct rlimit setlim;
+
+ prevbrk = sbrk(0);
+ ASSERT_NE(-1, reinterpret_cast<intptr_t>(prevbrk));
+ target_brk = reinterpret_cast<char*>(prevbrk) + 8192;
+
+ setlim.rlim_cur = 0;
+ setlim.rlim_max = RLIM_INFINITY;
+ // Set RLIMIT_DATA very low so any subsequent brk() calls fail.
+ // Reset RLIMIT_DATA during teardown step.
+ ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds());
+ EXPECT_THAT(brk(target_brk), SyscallFailsWithErrno(ENOMEM));
+ // Teardown step...
+ setlim.rlim_cur = RLIM_INFINITY;
+ ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds());
+}
+
+TEST_F(MMapTest, ExceedLimitDataPrlimit) {
+ // To unit test this more precisely, we'd need access to the mm's start_brk
+ // and end_brk, which we don't have direct access to :/
+ void* prevbrk;
+ void* target_brk;
+ struct rlimit setlim;
+
+ prevbrk = sbrk(0);
+ ASSERT_NE(-1, reinterpret_cast<intptr_t>(prevbrk));
+ target_brk = reinterpret_cast<char*>(prevbrk) + 8192;
+
+ setlim.rlim_cur = 0;
+ setlim.rlim_max = RLIM_INFINITY;
+ // Set RLIMIT_DATA very low so any subsequent brk() calls fail.
+ // Reset RLIMIT_DATA during teardown step.
+ ASSERT_THAT(prlimit(0, RLIMIT_DATA, &setlim, nullptr), SyscallSucceeds());
+ EXPECT_THAT(brk(target_brk), SyscallFailsWithErrno(ENOMEM));
+ // Teardown step...
+ setlim.rlim_cur = RLIM_INFINITY;
+ ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds());
+}
+
+TEST_F(MMapTest, ExceedLimitDataPrlimitPID) {
+ // To unit test this more precisely, we'd need access to the mm's start_brk
+ // and end_brk, which we don't have direct access to :/
+ void* prevbrk;
+ void* target_brk;
+ struct rlimit setlim;
+
+ prevbrk = sbrk(0);
+ ASSERT_NE(-1, reinterpret_cast<intptr_t>(prevbrk));
+ target_brk = reinterpret_cast<char*>(prevbrk) + 8192;
+
+ setlim.rlim_cur = 0;
+ setlim.rlim_max = RLIM_INFINITY;
+ // Set RLIMIT_DATA very low so any subsequent brk() calls fail.
+ // Reset RLIMIT_DATA during teardown step.
+ ASSERT_THAT(prlimit(syscall(__NR_gettid), RLIMIT_DATA, &setlim, nullptr),
+ SyscallSucceeds());
+ EXPECT_THAT(brk(target_brk), SyscallFailsWithErrno(ENOMEM));
+ // Teardown step...
+ setlim.rlim_cur = RLIM_INFINITY;
+ ASSERT_THAT(setrlimit(RLIMIT_DATA, &setlim), SyscallSucceeds());
+}
+
+TEST_F(MMapTest, NoExceedLimitAS) {
+ constexpr uint64_t kAllocBytes = 200 << 20;
+ // Add some headroom to the AS limit in case of e.g. unexpected stack
+ // expansion.
+ constexpr uint64_t kExtraASBytes = kAllocBytes + (20 << 20);
+ static_assert(kAllocBytes < kExtraASBytes,
+ "test depends on allocation not exceeding AS limit");
+
+ auto vss = ASSERT_NO_ERRNO_AND_VALUE(VirtualMemorySize());
+ struct rlimit setlim;
+ setlim.rlim_cur = vss + kExtraASBytes;
+ setlim.rlim_max = RLIM_INFINITY;
+ ASSERT_THAT(setrlimit(RLIMIT_AS, &setlim), SyscallSucceeds());
+ EXPECT_THAT(
+ Map(0, kAllocBytes, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+ SyscallSucceedsWithValue(Gt(0)));
+}
+
+TEST_F(MMapTest, ExceedLimitAS) {
+ constexpr uint64_t kAllocBytes = 200 << 20;
+ // Add some headroom to the AS limit in case of e.g. unexpected stack
+ // expansion.
+ constexpr uint64_t kExtraASBytes = 20 << 20;
+ static_assert(kAllocBytes > kExtraASBytes,
+ "test depends on allocation exceeding AS limit");
+
+ auto vss = ASSERT_NO_ERRNO_AND_VALUE(VirtualMemorySize());
+ struct rlimit setlim;
+ setlim.rlim_cur = vss + kExtraASBytes;
+ setlim.rlim_max = RLIM_INFINITY;
+ ASSERT_THAT(setrlimit(RLIMIT_AS, &setlim), SyscallSucceeds());
+ EXPECT_THAT(
+ Map(0, kAllocBytes, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+ SyscallFailsWithErrno(ENOMEM));
+}
+
+// Tests that setting an anonymous mmap to PROT_NONE doesn't free the memory.
+TEST_F(MMapTest, SettingProtNoneDoesntFreeMemory) {
+ uintptr_t addr;
+ constexpr uint8_t kFirstWord[] = {42, 42, 42, 42};
+
+ EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+ SyscallSucceedsWithValue(Gt(0)));
+
+ memset(reinterpret_cast<void*>(addr), 42, kPageSize);
+
+ ASSERT_THAT(Protect(addr, kPageSize, PROT_NONE), SyscallSucceeds());
+ ASSERT_THAT(Protect(addr, kPageSize, PROT_READ | PROT_WRITE),
+ SyscallSucceeds());
+
+ // The written data should still be there.
+ EXPECT_EQ(
+ 0, memcmp(reinterpret_cast<void*>(addr), kFirstWord, sizeof(kFirstWord)));
+}
+
+constexpr char kFileContents[] = "Hello World!";
+
+class MMapFileTest : public MMapTest {
+ protected:
+ FileDescriptor fd_;
+ std::string filename_;
+
+ // Open a file for read/write
+ void SetUp() override {
+ MMapTest::SetUp();
+
+ filename_ = NewTempAbsPath();
+ fd_ = ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_CREAT | O_RDWR, 0644));
+
+ // Extend file so it can be written once mapped. Deliberately make the file
+ // only half a page in size, so we can test what happens when we access the
+ // second half.
+ // Use ftruncate(2) once the sentry supports it.
+ char zero = 0;
+ size_t count = 0;
+ do {
+ const DisableSave ds; // saving 2048 times is slow and useless.
+ Write(&zero, 1), SyscallSucceedsWithValue(1);
+ } while (++count < (kPageSize / 2));
+ ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0));
+ }
+
+ // Close and delete file
+ void TearDown() override {
+ MMapTest::TearDown();
+ fd_.reset(); // Make sure the files is closed before we unlink it.
+ ASSERT_THAT(unlink(filename_.c_str()), SyscallSucceeds());
+ }
+
+ ssize_t Read(char* buf, size_t count) {
+ ssize_t len = 0;
+ do {
+ ssize_t ret = read(fd_.get(), buf, count);
+ if (ret < 0) {
+ return ret;
+ } else if (ret == 0) {
+ return len;
+ }
+
+ len += ret;
+ buf += ret;
+ } while (len < static_cast<ssize_t>(count));
+
+ return len;
+ }
+
+ ssize_t Write(const char* buf, size_t count) {
+ ssize_t len = 0;
+ do {
+ ssize_t ret = write(fd_.get(), buf, count);
+ if (ret < 0) {
+ return ret;
+ } else if (ret == 0) {
+ return len;
+ }
+
+ len += ret;
+ buf += ret;
+ } while (len < static_cast<ssize_t>(count));
+
+ return len;
+ }
+};
+
+// MAP_POPULATE allowed.
+// There isn't a good way to verify it actually did anything.
+//
+// FIXME: Parameterize.
+TEST_F(MMapFileTest, MapPopulate) {
+ ASSERT_THAT(
+ Map(0, kPageSize, PROT_READ, MAP_PRIVATE | MAP_POPULATE, fd_.get(), 0),
+ SyscallSucceeds());
+}
+
+// MAP_POPULATE on a short file.
+//
+// FIXME: Parameterize.
+TEST_F(MMapFileTest, MapPopulateShort) {
+ ASSERT_THAT(Map(0, 2 * kPageSize, PROT_READ, MAP_PRIVATE | MAP_POPULATE,
+ fd_.get(), 0),
+ SyscallSucceeds());
+}
+
+// Read contents from mapped file.
+TEST_F(MMapFileTest, Read) {
+ size_t len = strlen(kFileContents);
+ ASSERT_EQ(len, Write(kFileContents, len));
+
+ uintptr_t addr;
+ ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fd_.get(), 0),
+ SyscallSucceeds());
+
+ EXPECT_THAT(reinterpret_cast<char*>(addr),
+ EqualsMemory(std::string(kFileContents)));
+}
+
+// Map at an offset.
+TEST_F(MMapFileTest, MapOffset) {
+ ASSERT_THAT(lseek(fd_.get(), kPageSize, SEEK_SET), SyscallSucceeds());
+
+ size_t len = strlen(kFileContents);
+ ASSERT_EQ(len, Write(kFileContents, len));
+
+ uintptr_t addr;
+ ASSERT_THAT(
+ addr = Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fd_.get(), kPageSize),
+ SyscallSucceeds());
+
+ EXPECT_THAT(reinterpret_cast<char*>(addr),
+ EqualsMemory(std::string(kFileContents)));
+}
+
+TEST_F(MMapFileTest, MapOffsetBeyondEnd) {
+ SetupGvisorDeathTest();
+
+ uintptr_t addr;
+ ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE,
+ fd_.get(), 10 * kPageSize),
+ SyscallSucceeds());
+
+ // Touching the memory causes SIGBUS.
+ size_t len = strlen(kFileContents);
+ EXPECT_EXIT(std::copy(kFileContents, kFileContents + len,
+ reinterpret_cast<volatile char*>(addr)),
+ ::testing::KilledBySignal(SIGBUS), "");
+}
+
+// Verify mmap fails when sum of length and offset overflows.
+TEST_F(MMapFileTest, MapLengthPlusOffsetOverflows) {
+ const size_t length = static_cast<size_t>(-kPageSize);
+ const off_t offset = kPageSize;
+ ASSERT_THAT(Map(0, length, PROT_READ, MAP_PRIVATE, fd_.get(), offset),
+ SyscallFailsWithErrno(ENOMEM));
+}
+
+// MAP_PRIVATE PROT_WRITE is allowed on read-only FDs.
+TEST_F(MMapFileTest, WritePrivateOnReadOnlyFd) {
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_RDONLY));
+
+ uintptr_t addr;
+ EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE,
+ fd.get(), 0),
+ SyscallSucceeds());
+
+ // Touch the page to ensure the kernel didn't lie about writability.
+ size_t len = strlen(kFileContents);
+ std::copy(kFileContents, kFileContents + len,
+ reinterpret_cast<volatile char*>(addr));
+}
+
+// MAP_PRIVATE PROT_READ is not allowed on write-only FDs.
+TEST_F(MMapFileTest, ReadPrivateOnWriteOnlyFd) {
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_WRONLY));
+
+ uintptr_t addr;
+ EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fd.get(), 0),
+ SyscallFailsWithErrno(EACCES));
+}
+
+// MAP_SHARED PROT_WRITE not allowed on read-only FDs.
+TEST_F(MMapFileTest, WriteSharedOnReadOnlyFd) {
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_RDONLY));
+
+ uintptr_t addr;
+ EXPECT_THAT(
+ addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd.get(), 0),
+ SyscallFailsWithErrno(EACCES));
+}
+
+// MAP_SHARED PROT_READ not allowed on write-only FDs.
+//
+// FIXME: Parameterize.
+TEST_F(MMapFileTest, ReadSharedOnWriteOnlyFd) {
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_WRONLY));
+
+ uintptr_t addr;
+ EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd.get(), 0),
+ SyscallFailsWithErrno(EACCES));
+}
+
+// MAP_SHARED PROT_WRITE not allowed on write-only FDs.
+// The FD must always be readable.
+//
+// FIXME: Parameterize.
+TEST_F(MMapFileTest, WriteSharedOnWriteOnlyFd) {
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_WRONLY));
+
+ uintptr_t addr;
+ EXPECT_THAT(addr = Map(0, kPageSize, PROT_WRITE, MAP_SHARED, fd.get(), 0),
+ SyscallFailsWithErrno(EACCES));
+}
+
+// Overwriting the contents of a file mapped MAP_SHARED PROT_READ
+// should cause the new data to be reflected in the mapping.
+TEST_F(MMapFileTest, ReadSharedConsistentWithOverwrite) {
+ // Start from scratch.
+ EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());
+
+ // Expand the file to two pages and dirty them.
+ std::string bufA(kPageSize, 'a');
+ ASSERT_THAT(Write(bufA.c_str(), bufA.size()),
+ SyscallSucceedsWithValue(bufA.size()));
+ std::string bufB(kPageSize, 'b');
+ ASSERT_THAT(Write(bufB.c_str(), bufB.size()),
+ SyscallSucceedsWithValue(bufB.size()));
+
+ // Map the page.
+ uintptr_t addr;
+ ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0),
+ SyscallSucceeds());
+
+ // Check that the mapping contains the right file data.
+ EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), bufA.c_str(), kPageSize));
+ EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize), bufB.c_str(),
+ kPageSize));
+
+ // Start at the beginning of the file.
+ ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0));
+
+ // Swap the write pattern.
+ ASSERT_THAT(Write(bufB.c_str(), bufB.size()),
+ SyscallSucceedsWithValue(bufB.size()));
+ ASSERT_THAT(Write(bufA.c_str(), bufA.size()),
+ SyscallSucceedsWithValue(bufA.size()));
+
+ // Check that the mapping got updated.
+ EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), bufB.c_str(), kPageSize));
+ EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize), bufA.c_str(),
+ kPageSize));
+}
+
+// Partially overwriting a file mapped MAP_SHARED PROT_READ should be reflected
+// in the mapping.
+TEST_F(MMapFileTest, ReadSharedConsistentWithPartialOverwrite) {
+ // Start from scratch.
+ EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());
+
+ // Expand the file to two pages and dirty them.
+ std::string bufA(kPageSize, 'a');
+ ASSERT_THAT(Write(bufA.c_str(), bufA.size()),
+ SyscallSucceedsWithValue(bufA.size()));
+ std::string bufB(kPageSize, 'b');
+ ASSERT_THAT(Write(bufB.c_str(), bufB.size()),
+ SyscallSucceedsWithValue(bufB.size()));
+
+ // Map the page.
+ uintptr_t addr;
+ ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0),
+ SyscallSucceeds());
+
+ // Check that the mapping contains the right file data.
+ EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), bufA.c_str(), kPageSize));
+ EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize), bufB.c_str(),
+ kPageSize));
+
+ // Start at the beginning of the file.
+ ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0));
+
+ // Do a partial overwrite, spanning both pages.
+ std::string bufC(kPageSize + (kPageSize / 2), 'c');
+ ASSERT_THAT(Write(bufC.c_str(), bufC.size()),
+ SyscallSucceedsWithValue(bufC.size()));
+
+ // Check that the mapping got updated.
+ EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), bufC.c_str(),
+ kPageSize + (kPageSize / 2)));
+ EXPECT_EQ(0,
+ memcmp(reinterpret_cast<void*>(addr + kPageSize + (kPageSize / 2)),
+ bufB.c_str(), kPageSize / 2));
+}
+
+// Overwriting a file mapped MAP_SHARED PROT_READ should be reflected in the
+// mapping and the file.
+TEST_F(MMapFileTest, ReadSharedConsistentWithWriteAndFile) {
+ // Start from scratch.
+ EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());
+
+ // Expand the file to two full pages and dirty it.
+ std::string bufA(2 * kPageSize, 'a');
+ ASSERT_THAT(Write(bufA.c_str(), bufA.size()),
+ SyscallSucceedsWithValue(bufA.size()));
+
+ // Map only the first page.
+ uintptr_t addr;
+ ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0),
+ SyscallSucceeds());
+
+ // Prepare to overwrite the file contents.
+ ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0));
+
+ // Overwrite everything, beyond the mapped portion.
+ std::string bufB(2 * kPageSize, 'b');
+ ASSERT_THAT(Write(bufB.c_str(), bufB.size()),
+ SyscallSucceedsWithValue(bufB.size()));
+
+ // What the mapped portion should now look like.
+ std::string bufMapped(kPageSize, 'b');
+
+ // Expect that the mapped portion is consistent.
+ EXPECT_EQ(
+ 0, memcmp(reinterpret_cast<void*>(addr), bufMapped.c_str(), kPageSize));
+
+ // Prepare to read the entire file contents.
+ ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0));
+
+ // Expect that the file was fully updated.
+ std::vector<char> bufFile(2 * kPageSize);
+ ASSERT_THAT(Read(bufFile.data(), bufFile.size()),
+ SyscallSucceedsWithValue(bufFile.size()));
+ // Cast to void* to avoid EXPECT_THAT assuming bufFile.data() is a
+ // NUL-terminated C std::string. EXPECT_THAT will try to print a char* as a C
+ // std::string, possibly overruning the buffer.
+ EXPECT_THAT(reinterpret_cast<void*>(bufFile.data()), EqualsMemory(bufB));
+}
+
+// Write data to mapped file.
+TEST_F(MMapFileTest, WriteShared) {
+ uintptr_t addr;
+ ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
+ fd_.get(), 0),
+ SyscallSucceeds());
+
+ size_t len = strlen(kFileContents);
+ memcpy(reinterpret_cast<void*>(addr), kFileContents, len);
+
+ // The file may not actually be updated until munmap is called.
+ ASSERT_THAT(Unmap(), SyscallSucceeds());
+
+ std::vector<char> buf(len);
+ ASSERT_THAT(Read(buf.data(), buf.size()),
+ SyscallSucceedsWithValue(buf.size()));
+ // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a
+ // NUL-terminated C std::string. EXPECT_THAT will try to print a char* as a C
+ // std::string, possibly overruning the buffer.
+ EXPECT_THAT(reinterpret_cast<void*>(buf.data()),
+ EqualsMemory(std::string(kFileContents)));
+}
+
+// Write data to portion of mapped page beyond the end of the file.
+// These writes are not reflected in the file.
+TEST_F(MMapFileTest, WriteSharedBeyondEnd) {
+ // The file is only half of a page. We map an entire page. Writes to the
+ // end of the mapping must not be reflected in the file.
+ uintptr_t addr;
+ ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
+ fd_.get(), 0),
+ SyscallSucceeds());
+
+ // First half; this is reflected in the file.
+ std::string first(kPageSize / 2, 'A');
+ memcpy(reinterpret_cast<void*>(addr), first.c_str(), first.size());
+
+ // Second half; this is not reflected in the file.
+ std::string second(kPageSize / 2, 'B');
+ memcpy(reinterpret_cast<void*>(addr + kPageSize / 2), second.c_str(),
+ second.size());
+
+ // The file may not actually be updated until munmap is called.
+ ASSERT_THAT(Unmap(), SyscallSucceeds());
+
+ // Big enough to fit the entire page, if the writes are mistakenly written to
+ // the file.
+ std::vector<char> buf(kPageSize);
+
+ // Only the first half is in the file.
+ ASSERT_THAT(Read(buf.data(), buf.size()),
+ SyscallSucceedsWithValue(first.size()));
+ // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a
+ // NUL-terminated C std::string. EXPECT_THAT will try to print a char* as a C
+ // std::string, possibly overruning the buffer.
+ EXPECT_THAT(reinterpret_cast<void*>(buf.data()), EqualsMemory(first));
+}
+
+// The portion of a mapped page that becomes part of the file after a truncate
+// is reflected in the file.
+TEST_F(MMapFileTest, WriteSharedTruncateUp) {
+ // The file is only half of a page. We map an entire page. Writes to the
+ // end of the mapping must not be reflected in the file.
+ uintptr_t addr;
+ ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
+ fd_.get(), 0),
+ SyscallSucceeds());
+
+ // First half; this is reflected in the file.
+ std::string first(kPageSize / 2, 'A');
+ memcpy(reinterpret_cast<void*>(addr), first.c_str(), first.size());
+
+ // Second half; this is not reflected in the file now (see
+ // WriteSharedBeyondEnd), but will be after the truncate.
+ std::string second(kPageSize / 2, 'B');
+ memcpy(reinterpret_cast<void*>(addr + kPageSize / 2), second.c_str(),
+ second.size());
+
+ // Extend the file to a full page. The second half of the page will be
+ // reflected in the file.
+ EXPECT_THAT(ftruncate(fd_.get(), kPageSize), SyscallSucceeds());
+
+ // The file may not actually be updated until munmap is called.
+ ASSERT_THAT(Unmap(), SyscallSucceeds());
+
+ // The whole page is in the file.
+ std::vector<char> buf(kPageSize);
+ ASSERT_THAT(Read(buf.data(), buf.size()),
+ SyscallSucceedsWithValue(buf.size()));
+ // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a
+ // NUL-terminated C std::string. EXPECT_THAT will try to print a char* as a C
+ // std::string, possibly overruning the buffer.
+ EXPECT_THAT(reinterpret_cast<void*>(buf.data()), EqualsMemory(first));
+ EXPECT_THAT(reinterpret_cast<void*>(buf.data() + kPageSize / 2),
+ EqualsMemory(second));
+}
+
+TEST_F(MMapFileTest, ReadSharedTruncateDownThenUp) {
+ // Start from scratch.
+ EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());
+
+ // Expand the file to a full page and dirty it.
+ std::string buf(kPageSize, 'a');
+ ASSERT_THAT(Write(buf.c_str(), buf.size()),
+ SyscallSucceedsWithValue(buf.size()));
+
+ // Map the page.
+ uintptr_t addr;
+ ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0),
+ SyscallSucceeds());
+
+ // Check that the memory contains he file data.
+ EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), buf.c_str(), kPageSize));
+
+ // Truncate down, then up.
+ EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());
+ EXPECT_THAT(ftruncate(fd_.get(), kPageSize), SyscallSucceeds());
+
+ // Check that the memory was zeroed.
+ std::string zeroed(kPageSize, '\0');
+ EXPECT_EQ(0,
+ memcmp(reinterpret_cast<void*>(addr), zeroed.c_str(), kPageSize));
+
+ // The file may not actually be updated until msync is called.
+ ASSERT_THAT(Msync(), SyscallSucceeds());
+
+ // Prepare to read the entire file contents.
+ ASSERT_THAT(lseek(fd_.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0));
+
+ // Expect that the file is fully updated.
+ std::vector<char> bufFile(kPageSize);
+ ASSERT_THAT(Read(bufFile.data(), bufFile.size()),
+ SyscallSucceedsWithValue(bufFile.size()));
+ EXPECT_EQ(0, memcmp(bufFile.data(), zeroed.c_str(), kPageSize));
+}
+
+TEST_F(MMapFileTest, WriteSharedTruncateDownThenUp) {
+ // The file is only half of a page. We map an entire page. Writes to the
+ // end of the mapping must not be reflected in the file.
+ uintptr_t addr;
+ ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
+ fd_.get(), 0),
+ SyscallSucceeds());
+
+ // First half; this will be deleted by truncate(0).
+ std::string first(kPageSize / 2, 'A');
+ memcpy(reinterpret_cast<void*>(addr), first.c_str(), first.size());
+
+ // Truncate down, then up.
+ EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());
+ EXPECT_THAT(ftruncate(fd_.get(), kPageSize), SyscallSucceeds());
+
+ // The whole page is zeroed in memory.
+ std::string zeroed(kPageSize, '\0');
+ EXPECT_EQ(0,
+ memcmp(reinterpret_cast<void*>(addr), zeroed.c_str(), kPageSize));
+
+ // The file may not actually be updated until munmap is called.
+ ASSERT_THAT(Unmap(), SyscallSucceeds());
+
+ // The whole file is also zeroed.
+ std::vector<char> buf(kPageSize);
+ ASSERT_THAT(Read(buf.data(), buf.size()),
+ SyscallSucceedsWithValue(buf.size()));
+ // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a
+ // NUL-terminated C std::string. EXPECT_THAT will try to print a char* as a C
+ // std::string, possibly overruning the buffer.
+ EXPECT_THAT(reinterpret_cast<void*>(buf.data()), EqualsMemory(zeroed));
+}
+
+TEST_F(MMapFileTest, ReadSharedTruncateSIGBUS) {
+ SetupGvisorDeathTest();
+
+ // Start from scratch.
+ EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());
+
+ // Expand the file to a full page and dirty it.
+ std::string buf(kPageSize, 'a');
+ ASSERT_THAT(Write(buf.c_str(), buf.size()),
+ SyscallSucceedsWithValue(buf.size()));
+
+ // Map the page.
+ uintptr_t addr;
+ ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0),
+ SyscallSucceeds());
+
+ // Check that the mapping contains the file data.
+ EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), buf.c_str(), kPageSize));
+
+ // Truncate down.
+ EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());
+
+ // Accessing the truncated region should cause a SIGBUS.
+ std::vector<char> in(kPageSize);
+ EXPECT_EXIT(
+ std::copy(reinterpret_cast<volatile char*>(addr),
+ reinterpret_cast<volatile char*>(addr) + kPageSize, in.data()),
+ ::testing::KilledBySignal(SIGBUS), "");
+}
+
+TEST_F(MMapFileTest, WriteSharedTruncateSIGBUS) {
+ SetupGvisorDeathTest();
+
+ uintptr_t addr;
+ ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
+ fd_.get(), 0),
+ SyscallSucceeds());
+
+ // Touch the memory to be sure it really is mapped.
+ size_t len = strlen(kFileContents);
+ memcpy(reinterpret_cast<void*>(addr), kFileContents, len);
+
+ // Truncate down.
+ EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());
+
+ // Accessing the truncated file should cause a SIGBUS.
+ EXPECT_EXIT(std::copy(kFileContents, kFileContents + len,
+ reinterpret_cast<volatile char*>(addr)),
+ ::testing::KilledBySignal(SIGBUS), "");
+}
+
+TEST_F(MMapFileTest, ReadSharedTruncatePartialPage) {
+ // Start from scratch.
+ EXPECT_THAT(ftruncate(fd_.get(), 0), SyscallSucceeds());
+
+ // Dirty the file.
+ std::string buf(kPageSize, 'a');
+ ASSERT_THAT(Write(buf.c_str(), buf.size()),
+ SyscallSucceedsWithValue(buf.size()));
+
+ // Map a page.
+ uintptr_t addr;
+ ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0),
+ SyscallSucceeds());
+
+ // Truncate to half of the page.
+ EXPECT_THAT(ftruncate(fd_.get(), kPageSize / 2), SyscallSucceeds());
+
+ // First half of the page untouched.
+ EXPECT_EQ(0,
+ memcmp(reinterpret_cast<void*>(addr), buf.data(), kPageSize / 2));
+
+ // Second half is zeroed.
+ std::string zeroed(kPageSize / 2, '\0');
+ EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize / 2),
+ zeroed.c_str(), kPageSize / 2));
+}
+
+// Page can still be accessed and contents are intact after truncating a partial
+// page.
+TEST_F(MMapFileTest, WriteSharedTruncatePartialPage) {
+ // Expand the file to a full page.
+ EXPECT_THAT(ftruncate(fd_.get(), kPageSize), SyscallSucceeds());
+
+ uintptr_t addr;
+ ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
+ fd_.get(), 0),
+ SyscallSucceeds());
+
+ // Fill the entire page.
+ std::string contents(kPageSize, 'A');
+ memcpy(reinterpret_cast<void*>(addr), contents.c_str(), contents.size());
+
+ // Truncate half of the page.
+ EXPECT_THAT(ftruncate(fd_.get(), kPageSize / 2), SyscallSucceeds());
+
+ // First half of the page untouched.
+ EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr), contents.c_str(),
+ kPageSize / 2));
+
+ // Second half zeroed.
+ std::string zeroed(kPageSize / 2, '\0');
+ EXPECT_EQ(0, memcmp(reinterpret_cast<void*>(addr + kPageSize / 2),
+ zeroed.c_str(), kPageSize / 2));
+}
+
+// MAP_PRIVATE writes are not carried through to the underlying file.
+TEST_F(MMapFileTest, WritePrivate) {
+ uintptr_t addr;
+ ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE,
+ fd_.get(), 0),
+ SyscallSucceeds());
+
+ size_t len = strlen(kFileContents);
+ memcpy(reinterpret_cast<void*>(addr), kFileContents, len);
+
+ // The file should not be updated, but if it mistakenly is, it may not be
+ // until after munmap is called.
+ ASSERT_THAT(Unmap(), SyscallSucceeds());
+
+ std::vector<char> buf(len);
+ ASSERT_THAT(Read(buf.data(), buf.size()),
+ SyscallSucceedsWithValue(buf.size()));
+ // Cast to void* to avoid EXPECT_THAT assuming buf.data() is a
+ // NUL-terminated C std::string. EXPECT_THAT will try to print a char* as a C
+ // std::string, possibly overruning the buffer.
+ EXPECT_THAT(reinterpret_cast<void*>(buf.data()),
+ EqualsMemory(std::string(len, '\0')));
+}
+
+// SIGBUS raised when writing past end of file to a private mapping.
+//
+// FIXME: Parameterize.
+TEST_F(MMapFileTest, SigBusDeathWritePrivate) {
+ SetupGvisorDeathTest();
+
+ uintptr_t addr;
+ ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE,
+ fd_.get(), 0),
+ SyscallSucceeds());
+
+ // MMapFileTest makes a file kPageSize/2 long. The entire first page will be
+ // accessible. Write just beyond that.
+ size_t len = strlen(kFileContents);
+ EXPECT_EXIT(std::copy(kFileContents, kFileContents + len,
+ reinterpret_cast<volatile char*>(addr + kPageSize)),
+ ::testing::KilledBySignal(SIGBUS), "");
+}
+
+// SIGBUS raised when reading past end of file on a shared mapping.
+//
+// FIXME: Parameterize.
+TEST_F(MMapFileTest, SigBusDeathReadShared) {
+ SetupGvisorDeathTest();
+
+ uintptr_t addr;
+ ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0),
+ SyscallSucceeds());
+
+ // MMapFileTest makes a file kPageSize/2 long. The entire first page will be
+ // accessible. Read just beyond that.
+ std::vector<char> in(kPageSize);
+ EXPECT_EXIT(
+ std::copy(reinterpret_cast<volatile char*>(addr + kPageSize),
+ reinterpret_cast<volatile char*>(addr + kPageSize) + kPageSize,
+ in.data()),
+ ::testing::KilledBySignal(SIGBUS), "");
+}
+
+// SIGBUS raised when reading past end of file on a shared mapping.
+//
+// FIXME: Parameterize.
+TEST_F(MMapFileTest, SigBusDeathWriteShared) {
+ SetupGvisorDeathTest();
+
+ uintptr_t addr;
+ ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
+ fd_.get(), 0),
+ SyscallSucceeds());
+
+ // MMapFileTest makes a file kPageSize/2 long. The entire first page will be
+ // accessible. Write just beyond that.
+ size_t len = strlen(kFileContents);
+ EXPECT_EXIT(std::copy(kFileContents, kFileContents + len,
+ reinterpret_cast<volatile char*>(addr + kPageSize)),
+ ::testing::KilledBySignal(SIGBUS), "");
+}
+
+// Tests that SIGBUS is not raised when writing to a file-mapped page before
+// EOF, even if part of the mapping extends beyond EOF.
+TEST_F(MMapFileTest, NoSigBusOnPagesBeforeEOF) {
+ uintptr_t addr;
+ ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE,
+ fd_.get(), 0),
+ SyscallSucceeds());
+
+ // The test passes if this survives.
+ size_t len = strlen(kFileContents);
+ std::copy(kFileContents, kFileContents + len,
+ reinterpret_cast<volatile char*>(addr));
+}
+
+// Tests that SIGBUS is not raised when writing to a file-mapped page containing
+// EOF, *after* the EOF for a private mapping.
+TEST_F(MMapFileTest, NoSigBusOnPageContainingEOFWritePrivate) {
+ uintptr_t addr;
+ ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE,
+ fd_.get(), 0),
+ SyscallSucceeds());
+
+ // The test passes if this survives. (Technically addr+kPageSize/2 is already
+ // beyond EOF, but +1 to check for fencepost errors.)
+ size_t len = strlen(kFileContents);
+ std::copy(kFileContents, kFileContents + len,
+ reinterpret_cast<volatile char*>(addr + (kPageSize / 2) + 1));
+}
+
+// Tests that SIGBUS is not raised when reading from a file-mapped page
+// containing EOF, *after* the EOF for a shared mapping.
+//
+// FIXME: Parameterize.
+TEST_F(MMapFileTest, NoSigBusOnPageContainingEOFReadShared) {
+ uintptr_t addr;
+ ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0),
+ SyscallSucceeds());
+
+ // The test passes if this survives. (Technically addr+kPageSize/2 is already
+ // beyond EOF, but +1 to check for fencepost errors.)
+ auto* start = reinterpret_cast<volatile char*>(addr + (kPageSize / 2) + 1);
+ size_t len = strlen(kFileContents);
+ std::vector<char> in(len);
+ std::copy(start, start + len, in.data());
+}
+
+// Tests that SIGBUS is not raised when writing to a file-mapped page containing
+// EOF, *after* the EOF for a shared mapping.
+//
+// FIXME: Parameterize.
+TEST_F(MMapFileTest, NoSigBusOnPageContainingEOFWriteShared) {
+ uintptr_t addr;
+ ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
+ fd_.get(), 0),
+ SyscallSucceeds());
+
+ // The test passes if this survives. (Technically addr+kPageSize/2 is already
+ // beyond EOF, but +1 to check for fencepost errors.)
+ size_t len = strlen(kFileContents);
+ std::copy(kFileContents, kFileContents + len,
+ reinterpret_cast<volatile char*>(addr + (kPageSize / 2) + 1));
+}
+
+// Tests that reading from writable shared file-mapped pages succeeds.
+//
+// On most platforms this is trivial, but when the file is mapped via the sentry
+// page cache (which does not yet support writing to shared mappings), a bug
+// caused reads to fail unnecessarily on such mappings.
+TEST_F(MMapFileTest, ReadingWritableSharedFilePageSucceeds) {
+ uintptr_t addr;
+ size_t len = strlen(kFileContents);
+
+ ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
+ fd_.get(), 0),
+ SyscallSucceeds());
+
+ std::vector<char> buf(kPageSize);
+ // The test passes if this survives.
+ std::copy(reinterpret_cast<volatile char*>(addr),
+ reinterpret_cast<volatile char*>(addr) + len, buf.data());
+}
+
+// Tests that EFAULT is returned when invoking a syscall that requires the OS to
+// read past end of file (resulting in a fault in sentry context in the gVisor
+// case).
+TEST_F(MMapFileTest, InternalSigBus) {
+ uintptr_t addr;
+ ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE,
+ fd_.get(), 0),
+ SyscallSucceeds());
+
+ // This depends on the fact that gVisor implements pipes internally.
+ int pipefd[2];
+ ASSERT_THAT(pipe(pipefd), SyscallSucceeds());
+ EXPECT_THAT(
+ write(pipefd[1], reinterpret_cast<void*>(addr + kPageSize), kPageSize),
+ SyscallFailsWithErrno(EFAULT));
+
+ EXPECT_THAT(close(pipefd[0]), SyscallSucceeds());
+ EXPECT_THAT(close(pipefd[1]), SyscallSucceeds());
+}
+
+// Like InternalSigBus, but test the WriteZerosAt path by reading from
+// /dev/zero to a shared mapping (so that the SIGBUS isn't caught during
+// copy-on-write breaking).
+TEST_F(MMapFileTest, InternalSigBusZeroing) {
+ uintptr_t addr;
+ ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
+ fd_.get(), 0),
+ SyscallSucceeds());
+
+ const FileDescriptor dev_zero =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDONLY));
+ EXPECT_THAT(read(dev_zero.get(), reinterpret_cast<void*>(addr + kPageSize),
+ kPageSize),
+ SyscallFailsWithErrno(EFAULT));
+}
+
+// Checks that mmaps with a length of uint64_t(-PAGE_SIZE + 1) or greater do not
+// induce a sentry panic (due to "rounding up" to 0).
+TEST_F(MMapTest, HugeLength) {
+ EXPECT_THAT(Map(0, static_cast<uint64_t>(-kPageSize + 1), PROT_NONE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+ SyscallFailsWithErrno(ENOMEM));
+}
+
+// Tests for a specific gVisor MM caching bug.
+TEST_F(MMapTest, AccessCOWInvalidatesCachedSegments) {
+ auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDWR));
+ auto zero_fd = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDONLY));
+
+ // Get a two-page private mapping and fill it with 1s.
+ uintptr_t addr;
+ ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0),
+ SyscallSucceeds());
+ memset(addr_, 1, 2 * kPageSize);
+ MaybeSave();
+
+ // Fork to make the mapping copy-on-write.
+ pid_t const pid = fork();
+ if (pid == 0) {
+ // The child process waits for the parent to SIGKILL it.
+ while (true) {
+ pause();
+ }
+ }
+ ASSERT_THAT(pid, SyscallSucceeds());
+ auto cleanup_child = Cleanup([&] {
+ EXPECT_THAT(kill(pid, SIGKILL), SyscallSucceeds());
+ int status;
+ EXPECT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+ });
+
+ // Induce a read-only Access of the first page of the mapping, which will not
+ // cause a copy. The usermem.Segment should be cached.
+ ASSERT_THAT(PwriteFd(fd.get(), addr_, kPageSize, 0),
+ SyscallSucceedsWithValue(kPageSize));
+
+ // Induce a writable Access of both pages of the mapping. This should
+ // invalidate the cached Segment.
+ ASSERT_THAT(PreadFd(zero_fd.get(), addr_, 2 * kPageSize, 0),
+ SyscallSucceedsWithValue(2 * kPageSize));
+
+ // Induce a read-only Access of the first page of the mapping again. It should
+ // read the 0s that were stored in the mapping by the read from /dev/zero. If
+ // the read failed to invalidate the cached Segment, it will instead read the
+ // 1s in the stale page.
+ ASSERT_THAT(PwriteFd(fd.get(), addr_, kPageSize, 0),
+ SyscallSucceedsWithValue(kPageSize));
+ std::vector<char> buf(kPageSize);
+ ASSERT_THAT(PreadFd(fd.get(), buf.data(), kPageSize, 0),
+ SyscallSucceedsWithValue(kPageSize));
+ for (size_t i = 0; i < kPageSize; i++) {
+ ASSERT_EQ(0, buf[i]) << "at offset " << i;
+ }
+}
+
+TEST_F(MMapTest, NoReserve) {
+ const size_t kSize = 10 * 1 << 20; // 10M
+ uintptr_t addr;
+ ASSERT_THAT(addr = Map(0, kSize, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0),
+ SyscallSucceeds());
+ EXPECT_GT(addr, 0);
+
+ // Check that every page can be read/written. Technically, writing to memory
+ // could SIGSEGV in case there is no more memory available. In gVisor it
+ // would never happen though because NORESERVE is ignored. In Linux, it's
+ // possible to fail, but allocation is small enough that it's highly likely
+ // to succeed.
+ for (size_t j = 0; j < kSize; j += kPageSize) {
+ EXPECT_EQ(0, reinterpret_cast<char*>(addr)[j]);
+ reinterpret_cast<char*>(addr)[j] = j;
+ }
+}
+
+// Map more than the gVisor page-cache map unit (64k) and ensure that
+// it is consistent with reading from the file.
+TEST_F(MMapFileTest, Bug38498194) {
+ // Choose a sufficiently large map unit.
+ constexpr int kSize = 4 * 1024 * 1024;
+ EXPECT_THAT(ftruncate(fd_.get(), kSize), SyscallSucceeds());
+
+ // Map a large enough region so that multiple internal segments
+ // are created to back the mapping.
+ uintptr_t addr;
+ ASSERT_THAT(
+ addr = Map(0, kSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd_.get(), 0),
+ SyscallSucceeds());
+
+ std::vector<char> expect(kSize, 'a');
+ std::copy(expect.data(), expect.data() + expect.size(),
+ reinterpret_cast<volatile char*>(addr));
+
+ // Trigger writeback for gVisor. In Linux pages stay cached until
+ // it can't hold onto them anymore.
+ ASSERT_THAT(Unmap(), SyscallSucceeds());
+
+ std::vector<char> buf(kSize);
+ ASSERT_THAT(Read(buf.data(), buf.size()),
+ SyscallSucceedsWithValue(buf.size()));
+ EXPECT_EQ(buf, expect) << std::string(buf.data(), buf.size());
+}
+
+// Tests that reading from a file to a memory mapping of the same file does not
+// deadlock.
+TEST_F(MMapFileTest, SelfRead) {
+ uintptr_t addr;
+ ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED,
+ fd_.get(), 0),
+ SyscallSucceeds());
+ EXPECT_THAT(Read(reinterpret_cast<char*>(addr), kPageSize / 2),
+ SyscallSucceedsWithValue(kPageSize / 2));
+ // The resulting file contents are poorly-specified and irrelevant.
+}
+
+// Tests that writing to a file from a memory mapping of the same file does not
+// deadlock.
+TEST_F(MMapFileTest, SelfWrite) {
+ uintptr_t addr;
+ ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0),
+ SyscallSucceeds());
+ EXPECT_THAT(Write(reinterpret_cast<char*>(addr), kPageSize / 2),
+ SyscallSucceedsWithValue(kPageSize / 2));
+ // The resulting file contents are poorly-specified and irrelevant.
+}
+
+TEST(MMapDeathTest, TruncateAfterCOWBreak) {
+ SetupGvisorDeathTest();
+
+ // Create and map a single-page file.
+ auto const temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ auto const fd = ASSERT_NO_ERRNO_AND_VALUE(Open(temp_file.path(), O_RDWR));
+ ASSERT_THAT(ftruncate(fd.get(), kPageSize), SyscallSucceeds());
+ auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(Mmap(
+ nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd.get(), 0));
+
+ // Write to this mapping, causing the page to be copied for write.
+ memset(mapping.ptr(), 'a', mapping.len());
+ MaybeSave(); // Trigger a co-operative save cycle.
+
+ // Truncate the file and expect it to invalidate the copied page.
+ ASSERT_THAT(ftruncate(fd.get(), 0), SyscallSucceeds());
+ EXPECT_EXIT(*reinterpret_cast<volatile char*>(mapping.ptr()),
+ ::testing::KilledBySignal(SIGBUS), "");
+}
+
+// Conditional on MAP_32BIT.
+#ifdef __x86_64__
+
+TEST(MMapNoFixtureTest, Map32Bit) {
+ auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
+ MmapAnon(kPageSize, PROT_NONE, MAP_PRIVATE | MAP_32BIT));
+ EXPECT_LT(mapping.addr(), static_cast<uintptr_t>(1) << 32);
+ EXPECT_LE(mapping.endaddr(), static_cast<uintptr_t>(1) << 32);
+}
+
+#endif // defined(__x86_64__)
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/mount.cc b/test/syscalls/linux/mount.cc
new file mode 100644
index 000000000..76da8b75a
--- /dev/null
+++ b/test/syscalls/linux/mount.cc
@@ -0,0 +1,302 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <functional>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/strings/string_view.h"
+#include "absl/time/time.h"
+#include "test/util/capability_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/mount_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(MountTest, MountBadFilesystem) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+ // Linux expects a valid target before it checks the file system name.
+ auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ EXPECT_THAT(mount("", dir.path().c_str(), "foobar", 0, ""),
+ SyscallFailsWithErrno(ENODEV));
+}
+
+TEST(MountTest, MountInvalidTarget) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+ auto const dir = NewTempAbsPath();
+ EXPECT_THAT(mount("", dir.c_str(), "tmpfs", 0, ""),
+ SyscallFailsWithErrno(ENOENT));
+}
+
+TEST(MountTest, MountPermDenied) {
+ // Clear CAP_SYS_ADMIN.
+ if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))) {
+ EXPECT_NO_ERRNO(SetCapability(CAP_SYS_ADMIN, false));
+ }
+
+ // Linux expects a valid target before checking capability.
+ auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ EXPECT_THAT(mount("", dir.path().c_str(), "", 0, ""),
+ SyscallFailsWithErrno(EPERM));
+}
+
+TEST(MountTest, UmountPermDenied) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+ auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ auto const mount =
+ ASSERT_NO_ERRNO_AND_VALUE(Mount("", dir.path(), "tmpfs", 0, "", 0));
+
+ // Drop privileges in another thread, so we can still unmount the mounted
+ // directory.
+ ScopedThread([&]() {
+ EXPECT_NO_ERRNO(SetCapability(CAP_SYS_ADMIN, false));
+ EXPECT_THAT(umount(dir.path().c_str()), SyscallFailsWithErrno(EPERM));
+ });
+}
+
+TEST(MountTest, MountOverBusy) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+ auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ auto const fd = ASSERT_NO_ERRNO_AND_VALUE(
+ Open(JoinPath(dir.path(), "foo"), O_CREAT | O_RDWR, 0777));
+
+ // Should be able to mount over a busy directory.
+ ASSERT_NO_ERRNO_AND_VALUE(Mount("", dir.path(), "tmpfs", 0, "", 0));
+}
+
+TEST(MountTest, OpenFileBusy) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+ auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ auto const mount = ASSERT_NO_ERRNO_AND_VALUE(
+ Mount("", dir.path(), "tmpfs", 0, "mode=0700", 0));
+ auto const fd = ASSERT_NO_ERRNO_AND_VALUE(
+ Open(JoinPath(dir.path(), "foo"), O_CREAT | O_RDWR, 0777));
+
+ // An open file should prevent unmounting.
+ EXPECT_THAT(umount(dir.path().c_str()), SyscallFailsWithErrno(EBUSY));
+}
+
+TEST(MountTest, UmountDetach) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+ // structure:
+ //
+ // dir (mount point)
+ // subdir
+ // file
+ //
+ // We show that we can walk around in the mount after detach-unmount dir.
+ //
+ // We show that even though dir is unreachable from outside the mount, we can
+ // still reach dir's (former) parent!
+ auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+ const struct stat before = ASSERT_NO_ERRNO_AND_VALUE(Stat(dir.path()));
+ auto mount =
+ ASSERT_NO_ERRNO_AND_VALUE(Mount("", dir.path(), "tmpfs", 0, "mode=0700",
+ /* umountflags= */ MNT_DETACH));
+ const struct stat after = ASSERT_NO_ERRNO_AND_VALUE(Stat(dir.path()));
+ EXPECT_NE(before.st_ino, after.st_ino);
+
+ // Create files in the new mount.
+ constexpr char kContents[] = "no no no";
+ auto const subdir =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir.path()));
+ auto const file = ASSERT_NO_ERRNO_AND_VALUE(
+ TempPath::CreateFileWith(dir.path(), kContents, 0777));
+
+ auto const dir_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(subdir.path(), O_RDONLY | O_DIRECTORY));
+ auto const fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY));
+
+ // Unmount the tmpfs.
+ mount.Release()();
+
+ const struct stat after2 = ASSERT_NO_ERRNO_AND_VALUE(Stat(dir.path()));
+ EXPECT_EQ(before.st_ino, after2.st_ino);
+
+ // Can still read file after unmounting.
+ std::vector<char> buf(sizeof(kContents));
+ EXPECT_THAT(ReadFd(fd.get(), buf.data(), buf.size()), SyscallSucceeds());
+
+ // Walk to dir.
+ auto const mounted_dir = ASSERT_NO_ERRNO_AND_VALUE(
+ OpenAt(dir_fd.get(), "..", O_DIRECTORY | O_RDONLY));
+ // Walk to dir/file.
+ auto const fd_again = ASSERT_NO_ERRNO_AND_VALUE(
+ OpenAt(mounted_dir.get(), std::string(Basename(file.path())), O_RDONLY));
+
+ std::vector<char> buf2(sizeof(kContents));
+ EXPECT_THAT(ReadFd(fd_again.get(), buf2.data(), buf2.size()),
+ SyscallSucceeds());
+ EXPECT_EQ(buf, buf2);
+
+ // Walking outside the unmounted realm should still work, too!
+ auto const dir_parent = ASSERT_NO_ERRNO_AND_VALUE(
+ OpenAt(mounted_dir.get(), "..", O_DIRECTORY | O_RDONLY));
+}
+
+TEST(MountTest, ActiveSubmountBusy) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+ auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ auto const mount1 = ASSERT_NO_ERRNO_AND_VALUE(
+ Mount("", dir.path(), "tmpfs", 0, "mode=0700", 0));
+
+ auto const dir2 =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir.path()));
+ auto const mount2 =
+ ASSERT_NO_ERRNO_AND_VALUE(Mount("", dir2.path(), "tmpfs", 0, "", 0));
+
+ // Since dir now has an active submount, should not be able to unmount.
+ EXPECT_THAT(umount(dir.path().c_str()), SyscallFailsWithErrno(EBUSY));
+}
+
+TEST(MountTest, MountTmpfs) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+ auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const struct stat before = ASSERT_NO_ERRNO_AND_VALUE(Stat(dir.path()));
+
+ {
+ auto const mount = ASSERT_NO_ERRNO_AND_VALUE(
+ Mount("", dir.path(), "tmpfs", 0, "mode=0700", 0));
+
+ const struct stat s = ASSERT_NO_ERRNO_AND_VALUE(Stat(dir.path()));
+ EXPECT_EQ(s.st_mode, S_IFDIR | 0700);
+ EXPECT_NE(s.st_ino, before.st_ino);
+
+ EXPECT_NO_ERRNO(Open(JoinPath(dir.path(), "foo"), O_CREAT | O_RDWR, 0777));
+ }
+
+ // Now that dir is unmounted again, we should have the old inode back.
+ const struct stat after = ASSERT_NO_ERRNO_AND_VALUE(Stat(dir.path()));
+ EXPECT_EQ(before.st_ino, after.st_ino);
+}
+
+TEST(MountTest, MountTmpfsMagicValIgnored) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+ auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+ auto const mount = ASSERT_NO_ERRNO_AND_VALUE(
+ Mount("", dir.path(), "tmpfs", MS_MGC_VAL, "mode=0700", 0));
+}
+
+// Passing nullptr to data is equivalent to "".
+TEST(MountTest, NullData) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+ auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+ EXPECT_THAT(mount("", dir.path().c_str(), "tmpfs", 0, nullptr),
+ SyscallSucceeds());
+ EXPECT_THAT(umount2(dir.path().c_str(), 0), SyscallSucceeds());
+}
+
+TEST(MountTest, MountReadonly) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+ auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ auto const mount = ASSERT_NO_ERRNO_AND_VALUE(
+ Mount("", dir.path(), "tmpfs", MS_RDONLY, "mode=0777", 0));
+
+ const struct stat s = ASSERT_NO_ERRNO_AND_VALUE(Stat(dir.path()));
+ EXPECT_EQ(s.st_mode, S_IFDIR | 0777);
+
+ std::string const filename = JoinPath(dir.path(), "foo");
+ EXPECT_THAT(open(filename.c_str(), O_RDWR | O_CREAT, 0777),
+ SyscallFailsWithErrno(EROFS));
+}
+
+PosixErrorOr<absl::Time> ATime(absl::string_view file) {
+ struct stat s = {};
+ if (stat(std::string(file).c_str(), &s) == -1) {
+ return PosixError(errno, "stat failed");
+ }
+ return absl::TimeFromTimespec(s.st_atim);
+}
+
+// FIXME: Disabled until tmpfs stops using Handle, as only the gofer
+// and host file system respect the MS_NOATIME flag.
+TEST(MountTest, DISABLED_MountNoAtime) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+ auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ auto const mount = ASSERT_NO_ERRNO_AND_VALUE(
+ Mount("", dir.path(), "tmpfs", MS_NOATIME, "mode=0777", 0));
+
+ std::string const contents = "No no no, don't follow the instructions!";
+ auto const file = ASSERT_NO_ERRNO_AND_VALUE(
+ TempPath::CreateFileWith(dir.path(), contents, 0777));
+
+ absl::Time const before = ASSERT_NO_ERRNO_AND_VALUE(ATime(file.path()));
+
+ // Reading from the file should change the atime, but the MS_NOATIME flag
+ // should prevent that.
+ auto const fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR));
+ char buf[100];
+ int read_n;
+ ASSERT_THAT(read_n = read(fd.get(), buf, sizeof(buf)), SyscallSucceeds());
+ EXPECT_EQ(std::string(buf, read_n), contents);
+
+ absl::Time const after = ASSERT_NO_ERRNO_AND_VALUE(ATime(file.path()));
+
+ // Expect that atime hasn't changed.
+ EXPECT_EQ(before, after);
+}
+
+TEST(MountTest, RenameRemoveMountPoint) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+ auto const dir_parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ auto const dir =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir_parent.path()));
+ auto const new_dir = NewTempAbsPath();
+
+ auto const mount =
+ ASSERT_NO_ERRNO_AND_VALUE(Mount("", dir.path(), "tmpfs", 0, "", 0));
+
+ ASSERT_THAT(rename(dir.path().c_str(), new_dir.c_str()),
+ SyscallFailsWithErrno(EBUSY));
+
+ ASSERT_THAT(rmdir(dir.path().c_str()), SyscallFailsWithErrno(EBUSY));
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/mremap.cc b/test/syscalls/linux/mremap.cc
new file mode 100644
index 000000000..ededab336
--- /dev/null
+++ b/test/syscalls/linux/mremap.cc
@@ -0,0 +1,514 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <string.h>
+#include <sys/mman.h>
+
+#include <string>
+
+#include "gmock/gmock.h"
+#include "absl/strings/string_view.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/logging.h"
+#include "test/util/memory_util.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+using ::testing::_;
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Wrapper for mremap that returns a PosixErrorOr<>, since the return type of
+// void* isn't directly compatible with SyscallSucceeds.
+PosixErrorOr<void*> Mremap(void* old_address, size_t old_size, size_t new_size,
+ int flags, void* new_address) {
+ void* rv = mremap(old_address, old_size, new_size, flags, new_address);
+ if (rv == MAP_FAILED) {
+ return PosixError(errno, "mremap failed");
+ }
+ return rv;
+}
+
+// Returns true if the page containing addr is mapped.
+bool IsMapped(uintptr_t addr) {
+ int const rv = msync(reinterpret_cast<void*>(addr & ~(kPageSize - 1)),
+ kPageSize, MS_ASYNC);
+ if (rv == 0) {
+ return true;
+ }
+ TEST_PCHECK_MSG(errno == ENOMEM, "msync failed with unexpected errno");
+ return false;
+}
+
+// Fixture for mremap tests parameterized by mmap flags.
+using MremapParamTest = ::testing::TestWithParam<int>;
+
+TEST_P(MremapParamTest, Noop) {
+ Mapping const m =
+ ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, GetParam()));
+
+ ASSERT_THAT(Mremap(m.ptr(), kPageSize, kPageSize, 0, nullptr),
+ IsPosixErrorOkAndHolds(m.ptr()));
+ EXPECT_TRUE(IsMapped(m.addr()));
+}
+
+TEST_P(MremapParamTest, InPlace_ShrinkingWholeVMA) {
+ Mapping const m =
+ ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(2 * kPageSize, PROT_NONE, GetParam()));
+
+ const auto rest = [&] {
+ // N.B. we must be in a single-threaded subprocess to ensure a
+ // background thread doesn't concurrently map the second page.
+ void* addr = mremap(m.ptr(), 2 * kPageSize, kPageSize, 0, nullptr);
+ TEST_PCHECK_MSG(addr != MAP_FAILED, "mremap failed");
+ TEST_CHECK(addr == m.ptr());
+ MaybeSave();
+
+ TEST_CHECK(IsMapped(m.addr()));
+ TEST_CHECK(!IsMapped(m.addr() + kPageSize));
+ };
+
+ EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0));
+}
+
+TEST_P(MremapParamTest, InPlace_ShrinkingPartialVMA) {
+ Mapping const m =
+ ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(3 * kPageSize, PROT_NONE, GetParam()));
+
+ const auto rest = [&] {
+ void* addr = mremap(m.ptr(), 2 * kPageSize, kPageSize, 0, nullptr);
+ TEST_PCHECK_MSG(addr != MAP_FAILED, "mremap failed");
+ TEST_CHECK(addr == m.ptr());
+ MaybeSave();
+
+ TEST_CHECK(IsMapped(m.addr()));
+ TEST_CHECK(!IsMapped(m.addr() + kPageSize));
+ TEST_CHECK(IsMapped(m.addr() + 2 * kPageSize));
+ };
+
+ EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0));
+}
+
+TEST_P(MremapParamTest, InPlace_ShrinkingAcrossVMAs) {
+ Mapping const m =
+ ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(3 * kPageSize, PROT_READ, GetParam()));
+ // Changing permissions on the first page forces it to become a separate vma.
+ ASSERT_THAT(mprotect(m.ptr(), kPageSize, PROT_NONE), SyscallSucceeds());
+
+ const auto rest = [&] {
+ // Both old_size and new_size now span two vmas; mremap
+ // shouldn't care.
+ void* addr = mremap(m.ptr(), 3 * kPageSize, 2 * kPageSize, 0, nullptr);
+ TEST_PCHECK_MSG(addr != MAP_FAILED, "mremap failed");
+ TEST_CHECK(addr == m.ptr());
+ MaybeSave();
+
+ TEST_CHECK(IsMapped(m.addr()));
+ TEST_CHECK(IsMapped(m.addr() + kPageSize));
+ TEST_CHECK(!IsMapped(m.addr() + 2 * kPageSize));
+ };
+
+ EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0));
+}
+
+TEST_P(MremapParamTest, InPlace_ExpansionSuccess) {
+ Mapping const m =
+ ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(2 * kPageSize, PROT_NONE, GetParam()));
+
+ const auto rest = [&] {
+ // Unmap the second page so that the first can be expanded back into it.
+ //
+ // N.B. we must be in a single-threaded subprocess to ensure a
+ // background thread doesn't concurrently map this page.
+ TEST_PCHECK(
+ munmap(reinterpret_cast<void*>(m.addr() + kPageSize), kPageSize) == 0);
+ MaybeSave();
+
+ void* addr = mremap(m.ptr(), kPageSize, 2 * kPageSize, 0, nullptr);
+ TEST_PCHECK_MSG(addr != MAP_FAILED, "mremap failed");
+ TEST_CHECK(addr == m.ptr());
+ MaybeSave();
+
+ TEST_CHECK(IsMapped(m.addr()));
+ TEST_CHECK(IsMapped(m.addr() + kPageSize));
+ };
+
+ EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0));
+}
+
+TEST_P(MremapParamTest, InPlace_ExpansionFailure) {
+ Mapping const m =
+ ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(3 * kPageSize, PROT_NONE, GetParam()));
+
+ const auto rest = [&] {
+ // Unmap the second page, leaving a one-page hole. Trying to expand the
+ // first page to three pages should fail since the original third page
+ // is still mapped.
+ TEST_PCHECK(
+ munmap(reinterpret_cast<void*>(m.addr() + kPageSize), kPageSize) == 0);
+ MaybeSave();
+
+ void* addr = mremap(m.ptr(), kPageSize, 3 * kPageSize, 0, nullptr);
+ TEST_CHECK_MSG(addr == MAP_FAILED, "mremap unexpectedly succeeded");
+ TEST_PCHECK_MSG(errno == ENOMEM, "mremap failed with wrong errno");
+ MaybeSave();
+
+ TEST_CHECK(IsMapped(m.addr()));
+ TEST_CHECK(!IsMapped(m.addr() + kPageSize));
+ TEST_CHECK(IsMapped(m.addr() + 2 * kPageSize));
+ };
+
+ EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0));
+}
+
+TEST_P(MremapParamTest, MayMove_Expansion) {
+ Mapping const m =
+ ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(3 * kPageSize, PROT_NONE, GetParam()));
+
+ const auto rest = [&] {
+ // Unmap the second page, leaving a one-page hole. Trying to expand the
+ // first page to three pages with MREMAP_MAYMOVE should force the
+ // mapping to be relocated since the original third page is still
+ // mapped.
+ TEST_PCHECK(
+ munmap(reinterpret_cast<void*>(m.addr() + kPageSize), kPageSize) == 0);
+ MaybeSave();
+
+ void* addr2 =
+ mremap(m.ptr(), kPageSize, 3 * kPageSize, MREMAP_MAYMOVE, nullptr);
+ TEST_PCHECK_MSG(addr2 != MAP_FAILED, "mremap failed");
+ MaybeSave();
+
+ const Mapping m2 = Mapping(addr2, 3 * kPageSize);
+ TEST_CHECK(m.addr() != m2.addr());
+
+ TEST_CHECK(!IsMapped(m.addr()));
+ TEST_CHECK(!IsMapped(m.addr() + kPageSize));
+ TEST_CHECK(IsMapped(m.addr() + 2 * kPageSize));
+ TEST_CHECK(IsMapped(m2.addr()));
+ TEST_CHECK(IsMapped(m2.addr() + kPageSize));
+ TEST_CHECK(IsMapped(m2.addr() + 2 * kPageSize));
+ };
+
+ EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0));
+}
+
+TEST_P(MremapParamTest, Fixed_SourceAndDestinationCannotOverlap) {
+ Mapping const m =
+ ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, GetParam()));
+
+ ASSERT_THAT(Mremap(m.ptr(), kPageSize, kPageSize,
+ MREMAP_MAYMOVE | MREMAP_FIXED, m.ptr()),
+ PosixErrorIs(EINVAL, _));
+ EXPECT_TRUE(IsMapped(m.addr()));
+}
+
+TEST_P(MremapParamTest, Fixed_SameSize) {
+ Mapping const src =
+ ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, GetParam()));
+ Mapping const dst =
+ ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, GetParam()));
+
+ const auto rest = [&] {
+ // Unmap dst to create a hole.
+ TEST_PCHECK(munmap(dst.ptr(), kPageSize) == 0);
+ MaybeSave();
+
+ void* addr = mremap(src.ptr(), kPageSize, kPageSize,
+ MREMAP_MAYMOVE | MREMAP_FIXED, dst.ptr());
+ TEST_PCHECK_MSG(addr != MAP_FAILED, "mremap failed");
+ TEST_CHECK(addr == dst.ptr());
+ MaybeSave();
+
+ TEST_CHECK(!IsMapped(src.addr()));
+ TEST_CHECK(IsMapped(dst.addr()));
+ };
+
+ EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0));
+}
+
+TEST_P(MremapParamTest, Fixed_SameSize_Unmapping) {
+ // Like the Fixed_SameSize case, but expect mremap to unmap the destination
+ // automatically.
+ Mapping const src =
+ ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, GetParam()));
+ Mapping const dst =
+ ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, GetParam()));
+
+ const auto rest = [&] {
+ void* addr = mremap(src.ptr(), kPageSize, kPageSize,
+ MREMAP_MAYMOVE | MREMAP_FIXED, dst.ptr());
+ TEST_PCHECK_MSG(addr != MAP_FAILED, "mremap failed");
+ TEST_CHECK(addr == dst.ptr());
+ MaybeSave();
+
+ TEST_CHECK(!IsMapped(src.addr()));
+ TEST_CHECK(IsMapped(dst.addr()));
+ };
+
+ EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0));
+}
+
+TEST_P(MremapParamTest, Fixed_ShrinkingWholeVMA) {
+ Mapping const src =
+ ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(2 * kPageSize, PROT_NONE, GetParam()));
+ Mapping const dst =
+ ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(2 * kPageSize, PROT_NONE, GetParam()));
+
+ const auto rest = [&] {
+ // Unmap dst so we can check that mremap does not keep the
+ // second page.
+ TEST_PCHECK(munmap(dst.ptr(), 2 * kPageSize) == 0);
+ MaybeSave();
+
+ void* addr = mremap(src.ptr(), 2 * kPageSize, kPageSize,
+ MREMAP_MAYMOVE | MREMAP_FIXED, dst.ptr());
+ TEST_PCHECK_MSG(addr != MAP_FAILED, "mremap failed");
+ TEST_CHECK(addr == dst.ptr());
+ MaybeSave();
+
+ TEST_CHECK(!IsMapped(src.addr()));
+ TEST_CHECK(!IsMapped(src.addr() + kPageSize));
+ TEST_CHECK(IsMapped(dst.addr()));
+ TEST_CHECK(!IsMapped(dst.addr() + kPageSize));
+ };
+
+ EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0));
+}
+
+TEST_P(MremapParamTest, Fixed_ShrinkingPartialVMA) {
+ Mapping const src =
+ ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(3 * kPageSize, PROT_NONE, GetParam()));
+ Mapping const dst =
+ ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(2 * kPageSize, PROT_NONE, GetParam()));
+
+ const auto rest = [&] {
+ // Unmap dst so we can check that mremap does not keep the
+ // second page.
+ TEST_PCHECK(munmap(dst.ptr(), 2 * kPageSize) == 0);
+ MaybeSave();
+
+ void* addr = mremap(src.ptr(), 2 * kPageSize, kPageSize,
+ MREMAP_MAYMOVE | MREMAP_FIXED, dst.ptr());
+ TEST_PCHECK_MSG(addr != MAP_FAILED, "mremap failed");
+ TEST_CHECK(addr == dst.ptr());
+ MaybeSave();
+
+ TEST_CHECK(!IsMapped(src.addr()));
+ TEST_CHECK(!IsMapped(src.addr() + kPageSize));
+ TEST_CHECK(IsMapped(src.addr() + 2 * kPageSize));
+ TEST_CHECK(IsMapped(dst.addr()));
+ TEST_CHECK(!IsMapped(dst.addr() + kPageSize));
+ };
+
+ EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0));
+}
+
+TEST_P(MremapParamTest, Fixed_ShrinkingAcrossVMAs) {
+ Mapping const src =
+ ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(3 * kPageSize, PROT_READ, GetParam()));
+ // Changing permissions on the first page forces it to become a separate vma.
+ ASSERT_THAT(mprotect(src.ptr(), kPageSize, PROT_NONE), SyscallSucceeds());
+ Mapping const dst =
+ ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(2 * kPageSize, PROT_NONE, GetParam()));
+
+ const auto rest = [&] {
+ // Unlike flags=0, MREMAP_FIXED requires that [old_address,
+ // old_address+new_size) only spans a single vma.
+ void* addr = mremap(src.ptr(), 3 * kPageSize, 2 * kPageSize,
+ MREMAP_MAYMOVE | MREMAP_FIXED, dst.ptr());
+ TEST_CHECK_MSG(addr == MAP_FAILED, "mremap unexpectedly succeeded");
+ TEST_PCHECK_MSG(errno == EFAULT, "mremap failed with wrong errno");
+ MaybeSave();
+
+ TEST_CHECK(IsMapped(src.addr()));
+ TEST_CHECK(IsMapped(src.addr() + kPageSize));
+ // Despite failing, mremap should have unmapped [old_address+new_size,
+ // old_address+old_size) (i.e. the third page).
+ TEST_CHECK(!IsMapped(src.addr() + 2 * kPageSize));
+ // Despite failing, mremap should have unmapped the destination pages.
+ TEST_CHECK(!IsMapped(dst.addr()));
+ TEST_CHECK(!IsMapped(dst.addr() + kPageSize));
+ };
+
+ EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0));
+}
+
+TEST_P(MremapParamTest, Fixed_Expansion) {
+ Mapping const src =
+ ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, GetParam()));
+ Mapping const dst =
+ ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(2 * kPageSize, PROT_NONE, GetParam()));
+
+ const auto rest = [&] {
+ // Unmap dst so we can check that mremap actually maps all pages
+ // at the destination.
+ TEST_PCHECK(munmap(dst.ptr(), 2 * kPageSize) == 0);
+ MaybeSave();
+
+ void* addr = mremap(src.ptr(), kPageSize, 2 * kPageSize,
+ MREMAP_MAYMOVE | MREMAP_FIXED, dst.ptr());
+ TEST_PCHECK_MSG(addr != MAP_FAILED, "mremap failed");
+ TEST_CHECK(addr == dst.ptr());
+ MaybeSave();
+
+ TEST_CHECK(!IsMapped(src.addr()));
+ TEST_CHECK(IsMapped(dst.addr()));
+ TEST_CHECK(IsMapped(dst.addr() + kPageSize));
+ };
+
+ EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0));
+}
+
+INSTANTIATE_TEST_CASE_P(PrivateShared, MremapParamTest,
+ ::testing::Values(MAP_PRIVATE, MAP_SHARED));
+
+// mremap with old_size == 0 only works with MAP_SHARED after Linux 4.14
+// (dba58d3b8c50 "mm/mremap: fail map duplication attempts for private
+// mappings").
+
+TEST(MremapTest, InPlace_Copy) {
+ Mapping const m =
+ ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, MAP_SHARED));
+ EXPECT_THAT(Mremap(m.ptr(), 0, kPageSize, 0, nullptr),
+ PosixErrorIs(ENOMEM, _));
+}
+
+TEST(MremapTest, MayMove_Copy) {
+ Mapping const m =
+ ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, MAP_SHARED));
+
+ // Remainder of this test executes in a subprocess to ensure that if mremap
+ // incorrectly removes m, it is not remapped by another thread.
+ const auto rest = [&] {
+ void* ptr = mremap(m.ptr(), 0, kPageSize, MREMAP_MAYMOVE, nullptr);
+ MaybeSave();
+ TEST_PCHECK_MSG(ptr != MAP_FAILED, "mremap failed");
+ TEST_CHECK(ptr != m.ptr());
+ TEST_CHECK(IsMapped(m.addr()));
+ TEST_CHECK(IsMapped(reinterpret_cast<uintptr_t>(ptr)));
+ };
+ EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0));
+}
+
+TEST(MremapTest, MustMove_Copy) {
+ Mapping const src =
+ ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, MAP_SHARED));
+ Mapping const dst =
+ ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, MAP_PRIVATE));
+
+ // Remainder of this test executes in a subprocess to ensure that if mremap
+ // incorrectly removes src, it is not remapped by another thread.
+ const auto rest = [&] {
+ void* ptr = mremap(src.ptr(), 0, kPageSize, MREMAP_MAYMOVE | MREMAP_FIXED,
+ dst.ptr());
+ MaybeSave();
+ TEST_PCHECK_MSG(ptr != MAP_FAILED, "mremap failed");
+ TEST_CHECK(ptr == dst.ptr());
+ TEST_CHECK(IsMapped(src.addr()));
+ TEST_CHECK(IsMapped(dst.addr()));
+ };
+ EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0));
+}
+
+void ExpectAllBytesAre(absl::string_view v, char c) {
+ for (size_t i = 0; i < v.size(); i++) {
+ ASSERT_EQ(v[i], c) << "at offset " << i;
+ }
+}
+
+TEST(MremapTest, ExpansionPreservesCOWPagesAndExposesNewFilePages) {
+ // Create a file with 3 pages. The first is filled with 'a', the second is
+ // filled with 'b', and the third is filled with 'c'.
+ TempPath const file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR));
+ ASSERT_THAT(WriteFd(fd.get(), std::string(kPageSize, 'a').c_str(), kPageSize),
+ SyscallSucceedsWithValue(kPageSize));
+ ASSERT_THAT(WriteFd(fd.get(), std::string(kPageSize, 'b').c_str(), kPageSize),
+ SyscallSucceedsWithValue(kPageSize));
+ ASSERT_THAT(WriteFd(fd.get(), std::string(kPageSize, 'c').c_str(), kPageSize),
+ SyscallSucceedsWithValue(kPageSize));
+
+ // Create a private mapping of the first 2 pages, and fill the second page
+ // with 'd'.
+ Mapping const src = ASSERT_NO_ERRNO_AND_VALUE(Mmap(nullptr, 2 * kPageSize,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE, fd.get(), 0));
+ memset(reinterpret_cast<void*>(src.addr() + kPageSize), 'd', kPageSize);
+ MaybeSave();
+
+ // Move the mapping while expanding it to 3 pages. The resulting mapping
+ // should contain the original first page of the file (filled with 'a'),
+ // followed by the private copy of the second page (filled with 'd'), followed
+ // by the newly-mapped third page of the file (filled with 'c').
+ Mapping const dst = ASSERT_NO_ERRNO_AND_VALUE(
+ MmapAnon(3 * kPageSize, PROT_NONE, MAP_PRIVATE));
+ ASSERT_THAT(Mremap(src.ptr(), 2 * kPageSize, 3 * kPageSize,
+ MREMAP_MAYMOVE | MREMAP_FIXED, dst.ptr()),
+ IsPosixErrorOkAndHolds(dst.ptr()));
+ auto const v = dst.view();
+ ExpectAllBytesAre(v.substr(0, kPageSize), 'a');
+ ExpectAllBytesAre(v.substr(kPageSize, kPageSize), 'd');
+ ExpectAllBytesAre(v.substr(2 * kPageSize, kPageSize), 'c');
+}
+
+TEST(MremapDeathTest, SharedAnon) {
+ SetupGvisorDeathTest();
+
+ // Reserve 4 pages of address space.
+ Mapping const reserved = ASSERT_NO_ERRNO_AND_VALUE(
+ MmapAnon(4 * kPageSize, PROT_NONE, MAP_PRIVATE));
+
+ // Create a 2-page shared anonymous mapping at the beginning of the
+ // reservation. Fill the first page with 'a' and the second with 'b'.
+ Mapping const m = ASSERT_NO_ERRNO_AND_VALUE(
+ Mmap(reserved.ptr(), 2 * kPageSize, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0));
+ memset(m.ptr(), 'a', kPageSize);
+ memset(reinterpret_cast<void*>(m.addr() + kPageSize), 'b', kPageSize);
+ MaybeSave();
+
+ // Shrink the mapping to 1 page in-place.
+ ASSERT_THAT(Mremap(m.ptr(), 2 * kPageSize, kPageSize, 0, m.ptr()),
+ IsPosixErrorOkAndHolds(m.ptr()));
+
+ // Expand the mapping to 3 pages, moving it forward by 1 page in the process
+ // since the old and new mappings can't overlap.
+ void* const new_m = reinterpret_cast<void*>(m.addr() + kPageSize);
+ ASSERT_THAT(Mremap(m.ptr(), kPageSize, 3 * kPageSize,
+ MREMAP_MAYMOVE | MREMAP_FIXED, new_m),
+ IsPosixErrorOkAndHolds(new_m));
+
+ // The first 2 pages of the mapping should still contain the data we wrote
+ // (i.e. shrinking should not have discarded the second page's data), while
+ // touching the third page should raise SIGBUS.
+ auto const v =
+ absl::string_view(static_cast<char const*>(new_m), 3 * kPageSize);
+ ExpectAllBytesAre(v.substr(0, kPageSize), 'a');
+ ExpectAllBytesAre(v.substr(kPageSize, kPageSize), 'b');
+ EXPECT_EXIT(ExpectAllBytesAre(v.substr(2 * kPageSize, kPageSize), '\0'),
+ ::testing::KilledBySignal(SIGBUS), "");
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/msync.cc b/test/syscalls/linux/msync.cc
new file mode 100644
index 000000000..0ddc621aa
--- /dev/null
+++ b/test/syscalls/linux/msync.cc
@@ -0,0 +1,145 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <functional>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "test/util/file_descriptor.h"
+#include "test/util/memory_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Parameters for msync tests. Use a std::tuple so we can use
+// ::testing::Combine.
+using MsyncTestParam =
+ std::tuple<int, // msync flags
+ std::function<PosixErrorOr<Mapping>()> // returns mapping to
+ // msync
+ >;
+
+class MsyncParameterizedTest : public ::testing::TestWithParam<MsyncTestParam> {
+ protected:
+ int msync_flags() const { return std::get<0>(GetParam()); }
+
+ PosixErrorOr<Mapping> GetMapping() const {
+ auto rv = std::get<1>(GetParam())();
+ return rv;
+ }
+};
+
+// All valid msync(2) flag combinations (not including MS_INVALIDATE, which
+// gVisor doesn't implement).
+constexpr std::initializer_list<int> kMsyncFlags = {MS_SYNC, MS_ASYNC, 0};
+
+// Returns functions that return mappings that should be successfully
+// msync()able.
+std::vector<std::function<PosixErrorOr<Mapping>()>> SyncableMappings() {
+ std::vector<std::function<PosixErrorOr<Mapping>()>> funcs;
+ for (bool const writable : {false, true}) {
+ for (int const mflags : {MAP_PRIVATE, MAP_SHARED}) {
+ int const prot = PROT_READ | (writable ? PROT_WRITE : 0);
+ int const oflags = O_CREAT | (writable ? O_RDWR : O_RDONLY);
+ funcs.push_back([=] {
+ return MmapAnon(kPageSize, prot, mflags);
+ });
+ funcs.push_back([=]() -> PosixErrorOr<Mapping> {
+ std::string const path = NewTempAbsPath();
+ ASSIGN_OR_RETURN_ERRNO(auto fd, Open(path, oflags, 0644));
+ // Don't unlink the file since that breaks save/restore. Just let the
+ // test infrastructure clean up all of our temporary files when we're
+ // done.
+ return Mmap(nullptr, kPageSize, prot, mflags, fd.get(), 0);
+ });
+ }
+ }
+ return funcs;
+}
+
+PosixErrorOr<Mapping> NoMappings() {
+ return PosixError(EINVAL, "unexpected attempt to create a mapping");
+}
+
+// "Fixture" for msync tests that hold for all valid flags, but do not create
+// mappings.
+using MsyncNoMappingTest = MsyncParameterizedTest;
+
+TEST_P(MsyncNoMappingTest, UnmappedAddressWithZeroLengthSucceeds) {
+ EXPECT_THAT(msync(nullptr, 0, msync_flags()), SyscallSucceeds());
+}
+
+TEST_P(MsyncNoMappingTest, UnmappedAddressWithNonzeroLengthFails) {
+ EXPECT_THAT(msync(nullptr, kPageSize, msync_flags()),
+ SyscallFailsWithErrno(ENOMEM));
+}
+
+INSTANTIATE_TEST_CASE_P(All, MsyncNoMappingTest,
+ ::testing::Combine(::testing::ValuesIn(kMsyncFlags),
+ ::testing::Values(NoMappings)));
+
+// "Fixture" for msync tests that are not parameterized by msync flags, but do
+// create mappings.
+using MsyncNoFlagsTest = MsyncParameterizedTest;
+
+TEST_P(MsyncNoFlagsTest, BothSyncAndAsyncFails) {
+ auto m = ASSERT_NO_ERRNO_AND_VALUE(GetMapping());
+ EXPECT_THAT(msync(m.ptr(), m.len(), MS_SYNC | MS_ASYNC),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+INSTANTIATE_TEST_CASE_P(
+ All, MsyncNoFlagsTest,
+ ::testing::Combine(::testing::Values(0), // ignored
+ ::testing::ValuesIn(SyncableMappings())));
+
+// "Fixture" for msync tests parameterized by both msync flags and sources of
+// mappings.
+using MsyncFullParamTest = MsyncParameterizedTest;
+
+TEST_P(MsyncFullParamTest, NormallySucceeds) {
+ auto m = ASSERT_NO_ERRNO_AND_VALUE(GetMapping());
+ EXPECT_THAT(msync(m.ptr(), m.len(), msync_flags()), SyscallSucceeds());
+}
+
+TEST_P(MsyncFullParamTest, UnalignedLengthSucceeds) {
+ auto m = ASSERT_NO_ERRNO_AND_VALUE(GetMapping());
+ EXPECT_THAT(msync(m.ptr(), m.len() - 1, msync_flags()), SyscallSucceeds());
+}
+
+TEST_P(MsyncFullParamTest, UnalignedAddressFails) {
+ auto m = ASSERT_NO_ERRNO_AND_VALUE(GetMapping());
+ EXPECT_THAT(
+ msync(reinterpret_cast<void*>(m.addr() + 1), m.len() - 1, msync_flags()),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+INSTANTIATE_TEST_CASE_P(
+ All, MsyncFullParamTest,
+ ::testing::Combine(::testing::ValuesIn(kMsyncFlags),
+ ::testing::ValuesIn(SyncableMappings())));
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/munmap.cc b/test/syscalls/linux/munmap.cc
new file mode 100644
index 000000000..e20039950
--- /dev/null
+++ b/test/syscalls/linux/munmap.cc
@@ -0,0 +1,53 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/mman.h>
+
+#include "gtest/gtest.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+class MunmapTest : public ::testing::Test {
+ protected:
+ void SetUp() override {
+ m_ = mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(MAP_FAILED, m_);
+ }
+
+ void* m_ = nullptr;
+};
+
+TEST_F(MunmapTest, HappyCase) {
+ EXPECT_THAT(munmap(m_, kPageSize), SyscallSucceeds());
+}
+
+TEST_F(MunmapTest, ZeroLength) {
+ EXPECT_THAT(munmap(m_, 0), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_F(MunmapTest, LastPageRoundUp) {
+ // Attempt to unmap up to and including the last page.
+ EXPECT_THAT(munmap(m_, static_cast<size_t>(-kPageSize + 1)),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/open.cc b/test/syscalls/linux/open.cc
new file mode 100644
index 000000000..5770680cd
--- /dev/null
+++ b/test/syscalls/linux/open.cc
@@ -0,0 +1,340 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/capability.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/file_base.h"
+#include "test/util/capability_util.h"
+#include "test/util/cleanup.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// This test is currently very rudimentary.
+//
+// There are plenty of extra cases to cover once the sentry supports them.
+//
+// Different types of opens:
+// * O_CREAT
+// * O_DIRECTORY
+// * O_NOFOLLOW
+// * O_PATH <- Will we ever support this?
+//
+// Special operations on open:
+// * O_EXCL
+//
+// Special files:
+// * Blocking behavior for a named pipe.
+//
+// Different errors:
+// * EACCES
+// * EEXIST
+// * ENAMETOOLONG
+// * ELOOP
+// * ENOTDIR
+// * EPERM
+class OpenTest : public FileTest {
+ void SetUp() override {
+ FileTest::SetUp();
+
+ ASSERT_THAT(
+ write(test_file_fd_.get(), test_data_.c_str(), test_data_.length()),
+ SyscallSucceedsWithValue(test_data_.length()));
+ EXPECT_THAT(lseek(test_file_fd_.get(), 0, SEEK_SET), SyscallSucceeds());
+ }
+
+ public:
+ const std::string test_data_ = "hello world\n";
+};
+
+TEST_F(OpenTest, ReadOnly) {
+ char buf;
+ const FileDescriptor ro_file =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDONLY));
+
+ EXPECT_THAT(read(ro_file.get(), &buf, 1), SyscallSucceedsWithValue(1));
+ EXPECT_THAT(lseek(ro_file.get(), 0, SEEK_SET), SyscallSucceeds());
+ EXPECT_THAT(write(ro_file.get(), &buf, 1), SyscallFailsWithErrno(EBADF));
+}
+
+TEST_F(OpenTest, WriteOnly) {
+ char buf;
+ const FileDescriptor wo_file =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_WRONLY));
+
+ EXPECT_THAT(read(wo_file.get(), &buf, 1), SyscallFailsWithErrno(EBADF));
+ EXPECT_THAT(lseek(wo_file.get(), 0, SEEK_SET), SyscallSucceeds());
+ EXPECT_THAT(write(wo_file.get(), &buf, 1), SyscallSucceedsWithValue(1));
+}
+
+TEST_F(OpenTest, ReadWrite) {
+ char buf;
+ const FileDescriptor rw_file =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR));
+
+ EXPECT_THAT(read(rw_file.get(), &buf, 1), SyscallSucceedsWithValue(1));
+ EXPECT_THAT(lseek(rw_file.get(), 0, SEEK_SET), SyscallSucceeds());
+ EXPECT_THAT(write(rw_file.get(), &buf, 1), SyscallSucceedsWithValue(1));
+}
+
+TEST_F(OpenTest, RelPath) {
+ auto name = std::string(Basename(test_file_name_));
+
+ ASSERT_THAT(chdir(GetAbsoluteTestTmpdir().c_str()), SyscallSucceeds());
+ const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(name, O_RDONLY));
+}
+
+TEST_F(OpenTest, AbsPath) {
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDONLY));
+}
+
+TEST_F(OpenTest, AtRelPath) {
+ auto name = std::string(Basename(test_file_name_));
+ const FileDescriptor dirfd = ASSERT_NO_ERRNO_AND_VALUE(
+ Open(GetAbsoluteTestTmpdir(), O_RDONLY | O_DIRECTORY));
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(OpenAt(dirfd.get(), name, O_RDONLY));
+}
+
+TEST_F(OpenTest, AtAbsPath) {
+ const FileDescriptor dirfd = ASSERT_NO_ERRNO_AND_VALUE(
+ Open(GetAbsoluteTestTmpdir(), O_RDONLY | O_DIRECTORY));
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(OpenAt(dirfd.get(), test_file_name_, O_RDONLY));
+}
+
+TEST_F(OpenTest, OpenNoFollowSymlink) {
+ const std::string link_path = JoinPath(GetAbsoluteTestTmpdir(), "link");
+ ASSERT_THAT(symlink(test_file_name_.c_str(), link_path.c_str()),
+ SyscallSucceeds());
+ auto cleanup = Cleanup([link_path]() {
+ EXPECT_THAT(unlink(link_path.c_str()), SyscallSucceeds());
+ });
+
+ // Open will succeed without O_NOFOLLOW and fails with O_NOFOLLOW.
+ const FileDescriptor fd2 =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(link_path, O_RDONLY));
+ ASSERT_THAT(open(link_path.c_str(), O_RDONLY | O_NOFOLLOW),
+ SyscallFailsWithErrno(ELOOP));
+}
+
+TEST_F(OpenTest, OpenNoFollowStillFollowsLinksInPath) {
+ // We will create the following structure:
+ // tmp_folder/real_folder/file
+ // tmp_folder/sym_folder -> tmp_folder/real_folder
+ //
+ // We will then open tmp_folder/sym_folder/file with O_NOFOLLOW and it
+ // should succeed as O_NOFOLLOW only applies to the final path component.
+ auto tmp_path =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(GetAbsoluteTestTmpdir()));
+ auto sym_path = ASSERT_NO_ERRNO_AND_VALUE(
+ TempPath::CreateSymlinkTo(GetAbsoluteTestTmpdir(), tmp_path.path()));
+ auto file_path =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(tmp_path.path()));
+
+ auto path_via_symlink = JoinPath(sym_path.path(), Basename(file_path.path()));
+ const FileDescriptor fd2 =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(path_via_symlink, O_RDONLY | O_NOFOLLOW));
+}
+
+TEST_F(OpenTest, Fault) {
+ char* totally_not_null = nullptr;
+ ASSERT_THAT(open(totally_not_null, O_RDONLY), SyscallFailsWithErrno(EFAULT));
+}
+
+TEST_F(OpenTest, AppendOnly) {
+ // First write some data to the fresh file.
+ const int64_t kBufSize = 1024;
+ std::vector<char> buf(kBufSize, 'a');
+
+ FileDescriptor fd0 = ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR));
+
+ std::fill(buf.begin(), buf.end(), 'a');
+ EXPECT_THAT(WriteFd(fd0.get(), buf.data(), buf.size()),
+ SyscallSucceedsWithValue(buf.size()));
+ fd0.reset(); // Close the file early.
+
+ // Next get two handles to the same file. We open two files because we want
+ // to make sure that appending is respected between them.
+ const FileDescriptor fd1 =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR | O_APPEND));
+ EXPECT_THAT(lseek(fd1.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0));
+
+ const FileDescriptor fd2 =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR | O_APPEND));
+ EXPECT_THAT(lseek(fd2.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0));
+
+ // Then try to write to the first file and make sure the bytes are appended.
+ EXPECT_THAT(WriteFd(fd1.get(), buf.data(), buf.size()),
+ SyscallSucceedsWithValue(buf.size()));
+
+ // Check that the size of the file is correct and that the offset has been
+ // incremented to that size.
+ struct stat s0;
+ EXPECT_THAT(fstat(fd1.get(), &s0), SyscallSucceeds());
+ EXPECT_EQ(s0.st_size, kBufSize * 2);
+ EXPECT_THAT(lseek(fd1.get(), 0, SEEK_CUR),
+ SyscallSucceedsWithValue(kBufSize * 2));
+
+ // Then try to write to the second file and make sure the bytes are appended.
+ EXPECT_THAT(WriteFd(fd2.get(), buf.data(), buf.size()),
+ SyscallSucceedsWithValue(buf.size()));
+
+ // Check that the size of the file is correct and that the offset has been
+ // incremented to that size.
+ struct stat s1;
+ EXPECT_THAT(fstat(fd2.get(), &s1), SyscallSucceeds());
+ EXPECT_EQ(s1.st_size, kBufSize * 3);
+ EXPECT_THAT(lseek(fd2.get(), 0, SEEK_CUR),
+ SyscallSucceedsWithValue(kBufSize * 3));
+}
+
+TEST_F(OpenTest, Truncate) {
+ {
+ // First write some data to the new file and close it.
+ FileDescriptor fd0 =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_WRONLY));
+ std::vector<char> orig(10, 'a');
+ EXPECT_THAT(WriteFd(fd0.get(), orig.data(), orig.size()),
+ SyscallSucceedsWithValue(orig.size()));
+ }
+
+ // Then open with truncate and verify that offset is set to 0.
+ const FileDescriptor fd1 =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR | O_TRUNC));
+ EXPECT_THAT(lseek(fd1.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0));
+
+ // Then write less data to the file and ensure the old content is gone.
+ std::vector<char> want(5, 'b');
+ EXPECT_THAT(WriteFd(fd1.get(), want.data(), want.size()),
+ SyscallSucceedsWithValue(want.size()));
+
+ struct stat stat;
+ EXPECT_THAT(fstat(fd1.get(), &stat), SyscallSucceeds());
+ EXPECT_EQ(stat.st_size, want.size());
+ EXPECT_THAT(lseek(fd1.get(), 0, SEEK_CUR),
+ SyscallSucceedsWithValue(want.size()));
+
+ // Read the data and ensure only the latest write is in the file.
+ std::vector<char> got(want.size() + 1, 'c');
+ ASSERT_THAT(pread(fd1.get(), got.data(), got.size(), 0),
+ SyscallSucceedsWithValue(want.size()));
+ EXPECT_EQ(memcmp(want.data(), got.data(), want.size()), 0)
+ << "rbuf=" << got.data();
+ EXPECT_EQ(got.back(), 'c'); // Last byte should not have been modified.
+}
+
+TEST_F(OpenTest, NameTooLong) {
+ char buf[4097] = {};
+ memset(buf, 'a', 4097);
+ EXPECT_THAT(open(buf, O_RDONLY), SyscallFailsWithErrno(ENAMETOOLONG));
+}
+
+TEST_F(OpenTest, DotsFromRoot) {
+ const FileDescriptor rootfd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/", O_RDONLY | O_DIRECTORY));
+ const FileDescriptor other_rootfd =
+ ASSERT_NO_ERRNO_AND_VALUE(OpenAt(rootfd.get(), "..", O_RDONLY));
+}
+
+TEST_F(OpenTest, DirectoryWritableFails) {
+ ASSERT_THAT(open(GetAbsoluteTestTmpdir().c_str(), O_RDWR),
+ SyscallFailsWithErrno(EISDIR));
+}
+
+TEST_F(OpenTest, FileNotDirectory) {
+ // Create a file and try to open it with O_DIRECTORY.
+ auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ ASSERT_THAT(open(file.path().c_str(), O_RDONLY | O_DIRECTORY),
+ SyscallFailsWithErrno(ENOTDIR));
+}
+
+TEST_F(OpenTest, Null) {
+ char c = '\0';
+ ASSERT_THAT(open(&c, O_RDONLY), SyscallFailsWithErrno(ENOENT));
+}
+
+// NOTE: While the man pages specify that this behavior should be
+// undefined, Linux truncates the file on opening read only if we have write
+// permission, so we will too.
+TEST_F(OpenTest, CanTruncateReadOnly) {
+ const FileDescriptor fd1 =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDONLY | O_TRUNC));
+
+ struct stat stat;
+ EXPECT_THAT(fstat(fd1.get(), &stat), SyscallSucceeds());
+ EXPECT_EQ(stat.st_size, 0);
+}
+
+// If we don't have read permission on the file, opening with
+// O_TRUNC should fail.
+TEST_F(OpenTest, CanTruncateReadOnlyNoWritePermission) {
+ // Drop capabilities that allow us to override file permissions.
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+
+ const DisableSave ds; // Permissions are dropped.
+ ASSERT_THAT(chmod(test_file_name_.c_str(), S_IRUSR | S_IRGRP),
+ SyscallSucceeds());
+
+ ASSERT_THAT(open(test_file_name_.c_str(), O_RDONLY | O_TRUNC),
+ SyscallFailsWithErrno(EACCES));
+
+ const FileDescriptor fd1 =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDONLY));
+
+ struct stat stat;
+ EXPECT_THAT(fstat(fd1.get(), &stat), SyscallSucceeds());
+ EXPECT_EQ(stat.st_size, test_data_.size());
+}
+
+// If we don't have read permission but have write permission, opening O_WRONLY
+// and O_TRUNC should succeed.
+TEST_F(OpenTest, CanTruncateWriteOnlyNoReadPermission) {
+ const DisableSave ds; // Permissions are dropped.
+
+ EXPECT_THAT(fchmod(test_file_fd_.get(), S_IWUSR | S_IWGRP),
+ SyscallSucceeds());
+
+ const FileDescriptor fd1 =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_WRONLY | O_TRUNC));
+
+ EXPECT_THAT(fchmod(test_file_fd_.get(), S_IRUSR | S_IRGRP),
+ SyscallSucceeds());
+
+ const FileDescriptor fd2 =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDONLY));
+
+ struct stat stat;
+ EXPECT_THAT(fstat(fd2.get(), &stat), SyscallSucceeds());
+ EXPECT_EQ(stat.st_size, 0);
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/open_create.cc b/test/syscalls/linux/open_create.cc
new file mode 100644
index 000000000..b2cbd63d1
--- /dev/null
+++ b/test/syscalls/linux/open_create.cc
@@ -0,0 +1,130 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/temp_umask.h"
+#include "test/util/capability_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+TEST(CreateTest, TmpFile) {
+ int fd;
+ EXPECT_THAT(fd = open(JoinPath(GetAbsoluteTestTmpdir(), "a").c_str(),
+ O_RDWR | O_CREAT, 0666),
+ SyscallSucceeds());
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+TEST(CreateTest, ExistingFile) {
+ int fd;
+ EXPECT_THAT(
+ fd = open(JoinPath(GetAbsoluteTestTmpdir(), "ExistingFile").c_str(),
+ O_RDWR | O_CREAT, 0666),
+ SyscallSucceeds());
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+
+ EXPECT_THAT(
+ fd = open(JoinPath(GetAbsoluteTestTmpdir(), "ExistingFile").c_str(),
+ O_RDWR | O_CREAT, 0666),
+ SyscallSucceeds());
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+TEST(CreateTest, CreateAtFile) {
+ int dirfd;
+ EXPECT_THAT(dirfd = open(GetAbsoluteTestTmpdir().c_str(), O_DIRECTORY, 0666),
+ SyscallSucceeds());
+ EXPECT_THAT(openat(dirfd, "CreateAtFile", O_RDWR | O_CREAT, 0666),
+ SyscallSucceeds());
+ EXPECT_THAT(close(dirfd), SyscallSucceeds());
+}
+
+TEST(CreateTest, HonorsUmask_NoRandomSave) {
+ const DisableSave ds; // file cannot be re-opened as writable.
+ TempUmask mask(0222);
+ int fd;
+ ASSERT_THAT(
+ fd = open(JoinPath(GetAbsoluteTestTmpdir(), "UmaskedFile").c_str(),
+ O_RDWR | O_CREAT, 0666),
+ SyscallSucceeds());
+ struct stat statbuf;
+ ASSERT_THAT(fstat(fd, &statbuf), SyscallSucceeds());
+ EXPECT_EQ(0444, statbuf.st_mode & 0777);
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+TEST(CreateTest, CreateExclusively) {
+ std::string filename = NewTempAbsPath();
+
+ int fd;
+ ASSERT_THAT(fd = open(filename.c_str(), O_CREAT | O_RDWR, 0644),
+ SyscallSucceeds());
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+
+ EXPECT_THAT(open(filename.c_str(), O_CREAT | O_EXCL | O_RDWR, 0644),
+ SyscallFailsWithErrno(EEXIST));
+}
+
+TEST(CreateTest, CreateFailsOnUnpermittedDir) {
+ // Make sure we don't have CAP_DAC_OVERRIDE, since that allows the user to
+ // always override directory permissions.
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+ ASSERT_THAT(open("/foo", O_CREAT | O_RDWR, 0644),
+ SyscallFailsWithErrno(EACCES));
+}
+
+TEST(CreateTest, CreateFailsOnDirWithoutWritePerms) {
+ // Make sure we don't have CAP_DAC_OVERRIDE, since that allows the user to
+ // always override directory permissions.
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+ auto parent = ASSERT_NO_ERRNO_AND_VALUE(
+ TempPath::CreateDirWith(GetAbsoluteTestTmpdir(), 0555));
+ auto file = JoinPath(parent.path(), "foo");
+ ASSERT_THAT(open(file.c_str(), O_CREAT | O_RDWR, 0644),
+ SyscallFailsWithErrno(EACCES));
+}
+
+// A file originally created RW, but opened RO can later be opened RW.
+TEST(CreateTest, OpenCreateROThenRW) {
+ TempPath file(NewTempAbsPath());
+
+ // Create a RW file, but only open it RO.
+ FileDescriptor fd1 = ASSERT_NO_ERRNO_AND_VALUE(
+ Open(file.path(), O_CREAT | O_EXCL | O_RDONLY, 0644));
+
+ // Now get a RW FD.
+ FileDescriptor fd2 = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR));
+
+ // fd1 is not writable, but fd2 is.
+ char c = 'a';
+ EXPECT_THAT(WriteFd(fd1.get(), &c, 1), SyscallFailsWithErrno(EBADF));
+ EXPECT_THAT(WriteFd(fd2.get(), &c, 1), SyscallSucceedsWithValue(1));
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/partial_bad_buffer.cc b/test/syscalls/linux/partial_bad_buffer.cc
new file mode 100644
index 000000000..073a6b8c1
--- /dev/null
+++ b/test/syscalls/linux/partial_bad_buffer.cc
@@ -0,0 +1,305 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <sys/uio.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/util/fs_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+using ::testing::Gt;
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+constexpr char kMessage[] = "hello world";
+
+// PartialBadBufferTest checks the result of various IO syscalls when passed a
+// buffer that does not have the space specified in the syscall (most of it is
+// PROT_NONE). Linux is annoyingly inconsistent among different syscalls, so we
+// test all of them.
+class PartialBadBufferTest : public ::testing::Test {
+ protected:
+ void SetUp() override {
+ // Create and open a directory for getdents cases.
+ directory_ = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ ASSERT_THAT(
+ directory_fd_ = open(directory_.path().c_str(), O_RDONLY | O_DIRECTORY),
+ SyscallSucceeds());
+
+ // Create and open a normal file, placing it in the directory
+ // so the getdents cases have some dirents.
+ name_ = JoinPath(directory_.path(), "a");
+ ASSERT_THAT(fd_ = open(name_.c_str(), O_RDWR | O_CREAT, 0644),
+ SyscallSucceeds());
+
+ // Write some initial data.
+ size_t size = sizeof(kMessage) - 1;
+ EXPECT_THAT(WriteFd(fd_, &kMessage, size), SyscallSucceedsWithValue(size));
+
+ ASSERT_THAT(lseek(fd_, 0, SEEK_SET), SyscallSucceeds());
+
+ addr_ = mmap(0, 2 * kPageSize, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(addr_, MAP_FAILED);
+ char* buf = reinterpret_cast<char*>(addr_);
+
+ // Guard page for our read to run into.
+ ASSERT_THAT(mprotect(reinterpret_cast<void*>(buf + kPageSize), kPageSize,
+ PROT_NONE),
+ SyscallSucceeds());
+
+ // Leave only one free byte in the buffer.
+ bad_buffer_ = buf + kPageSize - 1;
+ }
+
+ void TearDown() override {
+ EXPECT_THAT(munmap(addr_, 2 * kPageSize), SyscallSucceeds()) << addr_;
+ EXPECT_THAT(close(fd_), SyscallSucceeds());
+ EXPECT_THAT(unlink(name_.c_str()), SyscallSucceeds());
+ EXPECT_THAT(close(directory_fd_), SyscallSucceeds());
+ }
+
+ // Return buffer with n bytes of free space.
+ // N.B. this is the same buffer used to back bad_buffer_.
+ char* FreeBytes(size_t n) {
+ TEST_CHECK(n <= static_cast<size_t>(4096));
+ return reinterpret_cast<char*>(addr_) + kPageSize - n;
+ }
+
+ std::string name_;
+ int fd_;
+ TempPath directory_;
+ int directory_fd_;
+ void* addr_;
+ char* bad_buffer_;
+};
+
+// We do both "big" and "small" tests to try to hit the "zero copy" and
+// non-"zero copy" paths, which have different code paths for handling faults.
+
+TEST_F(PartialBadBufferTest, ReadBig) {
+ EXPECT_THAT(RetryEINTR(read)(fd_, bad_buffer_, kPageSize),
+ SyscallSucceedsWithValue(1));
+ EXPECT_EQ('h', bad_buffer_[0]);
+}
+
+TEST_F(PartialBadBufferTest, ReadSmall) {
+ EXPECT_THAT(RetryEINTR(read)(fd_, bad_buffer_, 10),
+ SyscallSucceedsWithValue(1));
+ EXPECT_EQ('h', bad_buffer_[0]);
+}
+
+TEST_F(PartialBadBufferTest, PreadBig) {
+ EXPECT_THAT(RetryEINTR(pread)(fd_, bad_buffer_, kPageSize, 0),
+ SyscallSucceedsWithValue(1));
+ EXPECT_EQ('h', bad_buffer_[0]);
+}
+
+TEST_F(PartialBadBufferTest, PreadSmall) {
+ EXPECT_THAT(RetryEINTR(pread)(fd_, bad_buffer_, 10, 0),
+ SyscallSucceedsWithValue(1));
+ EXPECT_EQ('h', bad_buffer_[0]);
+}
+
+TEST_F(PartialBadBufferTest, ReadvBig) {
+ struct iovec vec;
+ vec.iov_base = bad_buffer_;
+ vec.iov_len = kPageSize;
+
+ EXPECT_THAT(RetryEINTR(readv)(fd_, &vec, 1), SyscallSucceedsWithValue(1));
+ EXPECT_EQ('h', bad_buffer_[0]);
+}
+
+TEST_F(PartialBadBufferTest, ReadvSmall) {
+ struct iovec vec;
+ vec.iov_base = bad_buffer_;
+ vec.iov_len = 10;
+
+ EXPECT_THAT(RetryEINTR(readv)(fd_, &vec, 1), SyscallSucceedsWithValue(1));
+ EXPECT_EQ('h', bad_buffer_[0]);
+}
+
+TEST_F(PartialBadBufferTest, PreadvBig) {
+ struct iovec vec;
+ vec.iov_base = bad_buffer_;
+ vec.iov_len = kPageSize;
+
+ EXPECT_THAT(RetryEINTR(preadv)(fd_, &vec, 1, 0), SyscallSucceedsWithValue(1));
+ EXPECT_EQ('h', bad_buffer_[0]);
+}
+
+TEST_F(PartialBadBufferTest, PreadvSmall) {
+ struct iovec vec;
+ vec.iov_base = bad_buffer_;
+ vec.iov_len = 10;
+
+ EXPECT_THAT(RetryEINTR(preadv)(fd_, &vec, 1, 0), SyscallSucceedsWithValue(1));
+ EXPECT_EQ('h', bad_buffer_[0]);
+}
+
+TEST_F(PartialBadBufferTest, WriteBig) {
+ // FIXME: The sentry write syscalls will return immediately
+ // if Access returns an error, but Access may not return an error
+ // and the sentry will instead perform a partial write.
+ SKIP_IF(IsRunningOnGvisor());
+
+ EXPECT_THAT(RetryEINTR(write)(fd_, bad_buffer_, kPageSize),
+ SyscallFailsWithErrno(EFAULT));
+}
+
+TEST_F(PartialBadBufferTest, WriteSmall) {
+ // FIXME: The sentry write syscalls will return immediately
+ // if Access returns an error, but Access may not return an error
+ // and the sentry will instead perform a partial write.
+ SKIP_IF(IsRunningOnGvisor());
+
+ EXPECT_THAT(RetryEINTR(write)(fd_, bad_buffer_, 10),
+ SyscallFailsWithErrno(EFAULT));
+}
+
+TEST_F(PartialBadBufferTest, PwriteBig) {
+ // FIXME: The sentry write syscalls will return immediately
+ // if Access returns an error, but Access may not return an error
+ // and the sentry will instead perform a partial write.
+ SKIP_IF(IsRunningOnGvisor());
+
+ EXPECT_THAT(RetryEINTR(pwrite)(fd_, bad_buffer_, kPageSize, 0),
+ SyscallFailsWithErrno(EFAULT));
+}
+
+TEST_F(PartialBadBufferTest, PwriteSmall) {
+ // FIXME: The sentry write syscalls will return immediately
+ // if Access returns an error, but Access may not return an error
+ // and the sentry will instead perform a partial write.
+ SKIP_IF(IsRunningOnGvisor());
+
+ EXPECT_THAT(RetryEINTR(pwrite)(fd_, bad_buffer_, 10, 0),
+ SyscallFailsWithErrno(EFAULT));
+}
+
+TEST_F(PartialBadBufferTest, WritevBig) {
+ // FIXME: The sentry write syscalls will return immediately
+ // if Access returns an error, but Access may not return an error
+ // and the sentry will instead perform a partial write.
+ SKIP_IF(IsRunningOnGvisor());
+
+ struct iovec vec;
+ vec.iov_base = bad_buffer_;
+ vec.iov_len = kPageSize;
+
+ EXPECT_THAT(RetryEINTR(writev)(fd_, &vec, 1), SyscallFailsWithErrno(EFAULT));
+}
+
+TEST_F(PartialBadBufferTest, WritevSmall) {
+ // FIXME: The sentry write syscalls will return immediately
+ // if Access returns an error, but Access may not return an error
+ // and the sentry will instead perform a partial write.
+ SKIP_IF(IsRunningOnGvisor());
+
+ struct iovec vec;
+ vec.iov_base = bad_buffer_;
+ vec.iov_len = 10;
+
+ EXPECT_THAT(RetryEINTR(writev)(fd_, &vec, 1), SyscallFailsWithErrno(EFAULT));
+}
+
+TEST_F(PartialBadBufferTest, PwritevBig) {
+ // FIXME: The sentry write syscalls will return immediately
+ // if Access returns an error, but Access may not return an error
+ // and the sentry will instead perform a partial write.
+ SKIP_IF(IsRunningOnGvisor());
+
+ struct iovec vec;
+ vec.iov_base = bad_buffer_;
+ vec.iov_len = kPageSize;
+
+ EXPECT_THAT(RetryEINTR(pwritev)(fd_, &vec, 1, 0),
+ SyscallFailsWithErrno(EFAULT));
+}
+
+TEST_F(PartialBadBufferTest, PwritevSmall) {
+ // FIXME: The sentry write syscalls will return immediately
+ // if Access returns an error, but Access may not return an error
+ // and the sentry will instead perform a partial write.
+ SKIP_IF(IsRunningOnGvisor());
+
+ struct iovec vec;
+ vec.iov_base = bad_buffer_;
+ vec.iov_len = 10;
+
+ EXPECT_THAT(RetryEINTR(pwritev)(fd_, &vec, 1, 0),
+ SyscallFailsWithErrno(EFAULT));
+}
+
+// getdents returns EFAULT when the you claim the buffer is large enough, but
+// it actually isn't.
+TEST_F(PartialBadBufferTest, GetdentsBig) {
+ EXPECT_THAT(RetryEINTR(syscall)(SYS_getdents64, directory_fd_, bad_buffer_,
+ kPageSize),
+ SyscallFailsWithErrno(EFAULT));
+}
+
+// getdents returns EINVAL when the you claim the buffer is too small.
+TEST_F(PartialBadBufferTest, GetdentsSmall) {
+ EXPECT_THAT(
+ RetryEINTR(syscall)(SYS_getdents64, directory_fd_, bad_buffer_, 10),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+// getdents will write entries into a buffer if there is space before it faults.
+TEST_F(PartialBadBufferTest, GetdentsOneEntry) {
+ // 30 bytes is enough for one (small) entry.
+ char* buf = FreeBytes(30);
+
+ EXPECT_THAT(
+ RetryEINTR(syscall)(SYS_getdents64, directory_fd_, buf, kPageSize),
+ SyscallSucceedsWithValue(Gt(0)));
+}
+
+// Verify that when write returns EFAULT the kernel hasn't silently written
+// the initial valid bytes.
+TEST_F(PartialBadBufferTest, WriteEfaultIsntPartial) {
+ // FIXME: The sentry write syscalls will return immediately
+ // if Access returns an error, but Access may not return an error
+ // and the sentry will instead perform a partial write.
+ SKIP_IF(IsRunningOnGvisor());
+
+ bad_buffer_[0] = 'A';
+ EXPECT_THAT(RetryEINTR(write)(fd_, bad_buffer_, 10),
+ SyscallFailsWithErrno(EFAULT));
+
+ size_t size = 255;
+ char buf[255];
+ memset(buf, 0, size);
+
+ EXPECT_THAT(RetryEINTR(pread)(fd_, buf, size, 0),
+ SyscallSucceedsWithValue(sizeof(kMessage) - 1));
+
+ // 'A' has not been written.
+ EXPECT_STREQ(buf, kMessage);
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/pause.cc b/test/syscalls/linux/pause.cc
new file mode 100644
index 000000000..4e1148c24
--- /dev/null
+++ b/test/syscalls/linux/pause.cc
@@ -0,0 +1,88 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <signal.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <atomic>
+
+#include "gtest/gtest.h"
+#include "absl/synchronization/mutex.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+void NoopSignalHandler(int sig, siginfo_t* info, void* context) {}
+
+} // namespace
+
+TEST(PauseTest, OnlyReturnsWhenSignalHandled) {
+ struct sigaction sa;
+ sigfillset(&sa.sa_mask);
+
+ // Ensure that SIGUSR1 is ignored.
+ sa.sa_handler = SIG_IGN;
+ ASSERT_THAT(sigaction(SIGUSR1, &sa, nullptr), SyscallSucceeds());
+
+ // Register a handler for SIGUSR2.
+ sa.sa_sigaction = NoopSignalHandler;
+ sa.sa_flags = SA_SIGINFO;
+ ASSERT_THAT(sigaction(SIGUSR2, &sa, nullptr), SyscallSucceeds());
+
+ // The child sets their own tid.
+ absl::Mutex mu;
+ pid_t child_tid = 0;
+ bool child_tid_available = false;
+ std::atomic<int> sent_signal{0};
+ std::atomic<int> waking_signal{0};
+ ScopedThread t([&] {
+ mu.Lock();
+ child_tid = gettid();
+ child_tid_available = true;
+ mu.Unlock();
+ EXPECT_THAT(pause(), SyscallFailsWithErrno(EINTR));
+ waking_signal.store(sent_signal.load());
+ });
+ mu.Lock();
+ mu.Await(absl::Condition(&child_tid_available));
+ mu.Unlock();
+
+ // Wait a bit to let the child enter pause().
+ absl::SleepFor(absl::Seconds(3));
+
+ // The child should not be woken by SIGUSR1.
+ sent_signal.store(SIGUSR1);
+ ASSERT_THAT(tgkill(getpid(), child_tid, SIGUSR1), SyscallSucceeds());
+ absl::SleepFor(absl::Seconds(3));
+
+ // The child should be woken by SIGUSR2.
+ sent_signal.store(SIGUSR2);
+ ASSERT_THAT(tgkill(getpid(), child_tid, SIGUSR2), SyscallSucceeds());
+ absl::SleepFor(absl::Seconds(3));
+
+ EXPECT_EQ(SIGUSR2, waking_signal.load());
+}
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/pipe.cc b/test/syscalls/linux/pipe.cc
new file mode 100644
index 000000000..4731157e8
--- /dev/null
+++ b/test/syscalls/linux/pipe.cc
@@ -0,0 +1,480 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h> /* Obtain O_* constant definitions */
+#include <sys/ioctl.h>
+#include <sys/uio.h>
+#include <unistd.h>
+
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "absl/strings/str_cat.h"
+#include "absl/synchronization/notification.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Buffer size of a pipe.
+//
+// TODO: Get this from F_GETPIPE_SZ.
+constexpr int kPipeSize = 65536;
+
+class PipeTest : public ::testing::Test {
+ public:
+ static void SetUpTestCase() {
+ // Tests intentionally generate SIGPIPE.
+ TEST_PCHECK(signal(SIGPIPE, SIG_IGN) != SIG_ERR);
+ }
+
+ static void TearDownTestCase() {
+ TEST_PCHECK(signal(SIGPIPE, SIG_DFL) != SIG_ERR);
+ }
+};
+
+TEST_F(PipeTest, Basic) {
+ // fds[0] is read end, fds[1] is write end.
+ int fds[2];
+ int i = 0x12345678;
+ ASSERT_THAT(pipe(fds), SyscallSucceeds());
+
+ // Ensure that the inode number is the same for each end.
+ struct stat rst;
+ ASSERT_THAT(fstat(fds[0], &rst), SyscallSucceeds());
+ struct stat wst;
+ ASSERT_THAT(fstat(fds[1], &wst), SyscallSucceeds());
+ EXPECT_EQ(rst.st_ino, wst.st_ino);
+
+ ASSERT_THAT(write(fds[0], &i, sizeof(i)), SyscallFailsWithErrno(EBADF));
+ ASSERT_THAT(read(fds[1], &i, sizeof(i)), SyscallFailsWithErrno(EBADF));
+
+ ASSERT_THAT(write(fds[1], &i, sizeof(i)),
+ SyscallSucceedsWithValue(sizeof(i)));
+ int j;
+ ASSERT_THAT(read(fds[0], &j, sizeof(j)), SyscallSucceedsWithValue(sizeof(j)));
+ EXPECT_EQ(i, j);
+
+ ASSERT_THAT(fcntl(fds[0], F_GETFL), SyscallSucceeds());
+ ASSERT_THAT(fcntl(fds[1], F_GETFL), SyscallSucceedsWithValue(O_WRONLY));
+
+ ASSERT_THAT(close(fds[0]), SyscallSucceeds());
+ ASSERT_THAT(close(fds[1]), SyscallSucceeds());
+}
+
+TEST_F(PipeTest, BasicCloExec) {
+ // fds[0] is read end, fds[1] is write end.
+ int fds[2];
+ int i = 0x12345678;
+ ASSERT_THAT(pipe2(fds, O_CLOEXEC), SyscallSucceeds());
+
+ ASSERT_THAT(write(fds[0], &i, sizeof(i)), SyscallFailsWithErrno(EBADF));
+ ASSERT_THAT(read(fds[1], &i, sizeof(i)), SyscallFailsWithErrno(EBADF));
+
+ ASSERT_THAT(write(fds[1], &i, sizeof(i)),
+ SyscallSucceedsWithValue(sizeof(i)));
+ int j;
+ ASSERT_THAT(read(fds[0], &j, sizeof(j)), SyscallSucceedsWithValue(sizeof(j)));
+ EXPECT_EQ(i, j);
+
+ ASSERT_THAT(fcntl(fds[0], F_GETFL), SyscallSucceeds());
+ ASSERT_THAT(fcntl(fds[1], F_GETFL), SyscallSucceeds());
+
+ ASSERT_THAT(close(fds[0]), SyscallSucceeds());
+ ASSERT_THAT(close(fds[1]), SyscallSucceeds());
+}
+
+TEST_F(PipeTest, BasicNoBlock) {
+ // fds[0] is read end, fds[1] is write end.
+ int fds[2];
+ int i = 0x12345678;
+ ASSERT_THAT(pipe2(fds, O_NONBLOCK), SyscallSucceeds());
+
+ ASSERT_THAT(write(fds[0], &i, sizeof(i)), SyscallFailsWithErrno(EBADF));
+ ASSERT_THAT(read(fds[1], &i, sizeof(i)), SyscallFailsWithErrno(EBADF));
+
+ ASSERT_THAT(read(fds[0], &i, sizeof(i)), SyscallFailsWithErrno(EWOULDBLOCK));
+ ASSERT_THAT(write(fds[1], &i, sizeof(i)),
+ SyscallSucceedsWithValue(sizeof(i)));
+ int j;
+ ASSERT_THAT(read(fds[0], &j, sizeof(j)), SyscallSucceedsWithValue(sizeof(j)));
+ EXPECT_EQ(i, j);
+ ASSERT_THAT(read(fds[0], &i, sizeof(i)), SyscallFailsWithErrno(EWOULDBLOCK));
+
+ ASSERT_THAT(fcntl(fds[0], F_GETFL), SyscallSucceedsWithValue(O_NONBLOCK));
+ ASSERT_THAT(fcntl(fds[1], F_GETFL),
+ SyscallSucceedsWithValue(O_NONBLOCK | O_WRONLY));
+
+ ASSERT_THAT(close(fds[0]), SyscallSucceeds());
+ ASSERT_THAT(close(fds[1]), SyscallSucceeds());
+}
+
+TEST_F(PipeTest, BasicBothOptions) {
+ // fds[0] is read end, fds[1] is write end.
+ int fds[2];
+ int i = 0x12345678;
+ ASSERT_THAT(pipe2(fds, O_NONBLOCK | O_CLOEXEC), SyscallSucceeds());
+
+ ASSERT_THAT(write(fds[0], &i, sizeof(i)), SyscallFailsWithErrno(EBADF));
+ ASSERT_THAT(read(fds[1], &i, sizeof(i)), SyscallFailsWithErrno(EBADF));
+
+ ASSERT_THAT(read(fds[0], &i, sizeof(i)), SyscallFailsWithErrno(EWOULDBLOCK));
+ ASSERT_THAT(write(fds[1], &i, sizeof(i)),
+ SyscallSucceedsWithValue(sizeof(i)));
+ int j;
+ ASSERT_THAT(read(fds[0], &j, sizeof(j)), SyscallSucceedsWithValue(sizeof(j)));
+ EXPECT_EQ(i, j);
+ ASSERT_THAT(read(fds[0], &i, sizeof(i)), SyscallFailsWithErrno(EWOULDBLOCK));
+
+ ASSERT_THAT(fcntl(fds[0], F_GETFL), SyscallSucceedsWithValue(O_NONBLOCK));
+ ASSERT_THAT(fcntl(fds[1], F_GETFL),
+ SyscallSucceedsWithValue(O_NONBLOCK | O_WRONLY));
+
+ ASSERT_THAT(close(fds[0]), SyscallSucceeds());
+ ASSERT_THAT(close(fds[1]), SyscallSucceeds());
+}
+
+TEST_F(PipeTest, BasicBadOptions) {
+ int fds[2];
+ ASSERT_THAT(pipe2(fds, 0xDEAD), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_F(PipeTest, Seek) {
+ // fds[0] is read end, fds[1] is write end.
+ int fds[2];
+ int i = 0x12345678;
+ ASSERT_THAT(pipe(fds), SyscallSucceeds());
+
+ ASSERT_THAT(lseek(fds[0], 0, SEEK_CUR), SyscallFailsWithErrno(ESPIPE));
+ ASSERT_THAT(lseek(fds[1], 0, SEEK_CUR), SyscallFailsWithErrno(ESPIPE));
+ ASSERT_THAT(lseek(fds[0], 0, SEEK_SET), SyscallFailsWithErrno(ESPIPE));
+ ASSERT_THAT(lseek(fds[0], 4, SEEK_SET), SyscallFailsWithErrno(ESPIPE));
+ ASSERT_THAT(lseek(fds[1], 0, SEEK_SET), SyscallFailsWithErrno(ESPIPE));
+ ASSERT_THAT(lseek(fds[1], 4, SEEK_SET), SyscallFailsWithErrno(ESPIPE));
+
+ ASSERT_THAT(lseek(fds[0], 0, SEEK_CUR), SyscallFailsWithErrno(ESPIPE));
+ ASSERT_THAT(lseek(fds[0], 4, SEEK_CUR), SyscallFailsWithErrno(ESPIPE));
+ ASSERT_THAT(lseek(fds[1], 0, SEEK_CUR), SyscallFailsWithErrno(ESPIPE));
+ ASSERT_THAT(lseek(fds[1], 4, SEEK_CUR), SyscallFailsWithErrno(ESPIPE));
+
+ ASSERT_THAT(write(fds[1], &i, sizeof(i)),
+ SyscallSucceedsWithValue(sizeof(i)));
+ int j;
+
+ ASSERT_THAT(lseek(fds[0], 0, SEEK_SET), SyscallFailsWithErrno(ESPIPE));
+ ASSERT_THAT(lseek(fds[0], 4, SEEK_SET), SyscallFailsWithErrno(ESPIPE));
+ ASSERT_THAT(lseek(fds[1], 0, SEEK_SET), SyscallFailsWithErrno(ESPIPE));
+ ASSERT_THAT(lseek(fds[1], 4, SEEK_SET), SyscallFailsWithErrno(ESPIPE));
+
+ ASSERT_THAT(lseek(fds[0], 0, SEEK_CUR), SyscallFailsWithErrno(ESPIPE));
+ ASSERT_THAT(lseek(fds[0], 4, SEEK_CUR), SyscallFailsWithErrno(ESPIPE));
+ ASSERT_THAT(lseek(fds[1], 0, SEEK_CUR), SyscallFailsWithErrno(ESPIPE));
+ ASSERT_THAT(lseek(fds[1], 4, SEEK_CUR), SyscallFailsWithErrno(ESPIPE));
+
+ ASSERT_THAT(read(fds[0], &j, sizeof(j)), SyscallSucceedsWithValue(sizeof(j)));
+ EXPECT_EQ(i, j);
+
+ ASSERT_THAT(fcntl(fds[0], F_GETFL), SyscallSucceeds());
+ ASSERT_THAT(fcntl(fds[1], F_GETFL), SyscallSucceedsWithValue(O_WRONLY));
+
+ ASSERT_THAT(close(fds[0]), SyscallSucceeds());
+ ASSERT_THAT(close(fds[1]), SyscallSucceeds());
+}
+
+TEST_F(PipeTest, AbsoluteOffsetSyscallsFail) {
+ // Syscalls for IO at absolute offsets fail because pipes are not seekable.
+ int fds[2];
+ ASSERT_THAT(pipe(fds), SyscallSucceeds());
+
+ std::vector<char> buf(4096);
+ struct iovec iov;
+
+ EXPECT_THAT(pread(fds[1], buf.data(), buf.size(), 0),
+ SyscallFailsWithErrno(ESPIPE));
+ EXPECT_THAT(pwrite(fds[0], buf.data(), buf.size(), 0),
+ SyscallFailsWithErrno(ESPIPE));
+ EXPECT_THAT(preadv(fds[1], &iov, 1, 0), SyscallFailsWithErrno(ESPIPE));
+ EXPECT_THAT(pwritev(fds[0], &iov, 1, 0), SyscallFailsWithErrno(ESPIPE));
+
+ EXPECT_THAT(close(fds[0]), SyscallSucceeds());
+ EXPECT_THAT(close(fds[1]), SyscallSucceeds());
+}
+
+TEST_F(PipeTest, WriterSideCloses) {
+ int fds[2];
+ ASSERT_THAT(pipe(fds), SyscallSucceeds());
+ int rfd = fds[0];
+ int i = 123;
+ ScopedThread t([rfd]() {
+ int j;
+ ASSERT_THAT(read(rfd, &j, sizeof(j)), SyscallSucceedsWithValue(sizeof(j)));
+ // This will return when the close() completes.
+ ASSERT_THAT(read(rfd, &j, sizeof(j)), SyscallSucceeds());
+ // This will return straight away.
+ ASSERT_THAT(read(rfd, &j, sizeof(j)), SyscallSucceeds());
+ });
+ // Sleep a bit so the thread can block.
+ absl::SleepFor(absl::Seconds(1.0));
+ ASSERT_THAT(write(fds[1], &i, sizeof(i)),
+ SyscallSucceedsWithValue(sizeof(i)));
+ // Sleep a bit so the thread can block again.
+ absl::SleepFor(absl::Seconds(3.0));
+ ASSERT_THAT(close(fds[1]), SyscallSucceeds());
+ t.Join();
+
+ ASSERT_THAT(close(fds[0]), SyscallSucceeds());
+}
+
+TEST_F(PipeTest, WriterSideClosesReadDataFirst) {
+ int fds[2];
+ ASSERT_THAT(pipe(fds), SyscallSucceeds());
+ int i = 123;
+ ASSERT_THAT(write(fds[1], &i, sizeof(i)),
+ SyscallSucceedsWithValue(sizeof(i)));
+ ASSERT_THAT(close(fds[1]), SyscallSucceeds());
+ int j;
+ ASSERT_THAT(read(fds[0], &j, sizeof(j)), SyscallSucceedsWithValue(sizeof(j)));
+ ASSERT_EQ(j, i);
+ ASSERT_THAT(read(fds[0], &j, sizeof(j)), SyscallSucceeds());
+
+ ASSERT_THAT(close(fds[0]), SyscallSucceeds());
+}
+
+TEST_F(PipeTest, ReaderSideCloses) {
+ int fds[2];
+ ASSERT_THAT(pipe(fds), SyscallSucceeds());
+ ASSERT_THAT(close(fds[0]), SyscallSucceeds());
+ int i = 123;
+ ASSERT_THAT(write(fds[1], &i, sizeof(i)), SyscallFailsWithErrno(EPIPE));
+
+ ASSERT_THAT(close(fds[1]), SyscallSucceeds());
+}
+
+TEST_F(PipeTest, CloseTwice) {
+ int fds[2];
+ ASSERT_THAT(pipe(fds), SyscallSucceeds());
+ ASSERT_THAT(close(fds[0]), SyscallSucceeds());
+ ASSERT_THAT(close(fds[1]), SyscallSucceeds());
+ ASSERT_THAT(close(fds[0]), SyscallFailsWithErrno(EBADF));
+ ASSERT_THAT(close(fds[1]), SyscallFailsWithErrno(EBADF));
+
+ ASSERT_THAT(pipe(fds), SyscallSucceeds());
+ ASSERT_THAT(close(fds[1]), SyscallSucceeds());
+ ASSERT_THAT(close(fds[0]), SyscallSucceeds());
+ ASSERT_THAT(close(fds[0]), SyscallFailsWithErrno(EBADF));
+ ASSERT_THAT(close(fds[1]), SyscallFailsWithErrno(EBADF));
+}
+
+// Blocking write returns EPIPE when read end is closed if nothing has been
+// written.
+TEST_F(PipeTest, BlockWriteClosed) {
+ int fds[2];
+ ASSERT_THAT(pipe(fds), SyscallSucceeds());
+ int wfd = fds[1];
+
+ absl::Notification notify;
+ ScopedThread t([wfd, &notify]() {
+ std::vector<char> buf(kPipeSize);
+ // Exactly fill the pipe buffer.
+ ASSERT_THAT(WriteFd(wfd, buf.data(), buf.size()),
+ SyscallSucceedsWithValue(buf.size()));
+
+ notify.Notify();
+
+ // Attempt to write one more byte. Blocks.
+ // N.B. Don't use WriteFd, we don't want a retry.
+ ASSERT_THAT(write(wfd, buf.data(), 1), SyscallFailsWithErrno(EPIPE));
+ });
+
+ notify.WaitForNotification();
+ absl::SleepFor(absl::Seconds(1.0));
+ ASSERT_THAT(close(fds[0]), SyscallSucceeds());
+
+ t.Join();
+
+ ASSERT_THAT(close(fds[1]), SyscallSucceeds());
+}
+
+// Blocking write returns EPIPE when read end is closed even if something has
+// been written.
+//
+// FIXME: Pipe writes blocking early allows S/R to interrupt the
+// write(2) call before the buffer is full. Then the next call will will return
+// non-zero instead of EPIPE.
+TEST_F(PipeTest, BlockPartialWriteClosed_NoRandomSave) {
+ int fds[2];
+ ASSERT_THAT(pipe(fds), SyscallSucceeds());
+ int wfd = fds[1];
+
+ ScopedThread t([wfd]() {
+ std::vector<char> buf(2 * kPipeSize);
+ // Write more than fits in the buffer. Blocks then returns partial write
+ // when the other end is closed. The next call returns EPIPE.
+ if (IsRunningOnGvisor()) {
+ // FIXME: Pipe writes block early on gVisor, resulting in a
+ // shorter than expected partial write.
+ ASSERT_THAT(write(wfd, buf.data(), buf.size()),
+ SyscallSucceedsWithValue(::testing::Gt(0)));
+ } else {
+ ASSERT_THAT(write(wfd, buf.data(), buf.size()),
+ SyscallSucceedsWithValue(kPipeSize));
+ }
+ ASSERT_THAT(write(wfd, buf.data(), buf.size()),
+ SyscallFailsWithErrno(EPIPE));
+ });
+
+ // Leave time for write to become blocked.
+ absl::SleepFor(absl::Seconds(1.0));
+
+ ASSERT_THAT(close(fds[0]), SyscallSucceeds());
+
+ t.Join();
+
+ ASSERT_THAT(close(fds[1]), SyscallSucceeds());
+}
+
+TEST_F(PipeTest, ReadFromClosedFd_NoRandomSave) {
+ int fds[2];
+ ASSERT_THAT(pipe(fds), SyscallSucceeds());
+ int rfd = fds[0];
+ absl::Notification notify;
+ ScopedThread t([rfd, &notify]() {
+ int f;
+ notify.Notify();
+ ASSERT_THAT(read(rfd, &f, sizeof(f)), SyscallSucceedsWithValue(sizeof(f)));
+ ASSERT_EQ(123, f);
+ });
+ notify.WaitForNotification();
+ // Make sure that the thread gets to read().
+ absl::SleepFor(absl::Seconds(5.0));
+ {
+ // We cannot save/restore here as the read end of pipe is closed but there
+ // is ongoing read() above. We will not be able to restart the read()
+ // successfully in restore run since the read fd is closed.
+ const DisableSave ds;
+ ASSERT_THAT(close(fds[0]), SyscallSucceeds());
+ int i = 123;
+ ASSERT_THAT(write(fds[1], &i, sizeof(i)),
+ SyscallSucceedsWithValue(sizeof(i)));
+ t.Join();
+ }
+ ASSERT_THAT(close(fds[1]), SyscallSucceeds());
+}
+
+TEST_F(PipeTest, FionRead) {
+ // fds[0] is read end, fds[1] is write end.
+ int fds[2];
+ int data[2] = {0x12345678, 0x9101112};
+ ASSERT_THAT(pipe(fds), SyscallSucceeds());
+
+ int n = -1;
+ EXPECT_THAT(ioctl(fds[0], FIONREAD, &n), SyscallSucceedsWithValue(0));
+ EXPECT_EQ(n, 0);
+ n = -1;
+ EXPECT_THAT(ioctl(fds[1], FIONREAD, &n), SyscallSucceedsWithValue(0));
+ EXPECT_EQ(n, 0);
+
+ EXPECT_THAT(write(fds[1], data, sizeof(data)),
+ SyscallSucceedsWithValue(sizeof(data)));
+
+ n = -1;
+ EXPECT_THAT(ioctl(fds[0], FIONREAD, &n), SyscallSucceedsWithValue(0));
+ EXPECT_EQ(n, sizeof(data));
+ n = -1;
+ EXPECT_THAT(ioctl(fds[1], FIONREAD, &n), SyscallSucceedsWithValue(0));
+ EXPECT_EQ(n, sizeof(data));
+}
+
+// Test that opening an empty anonymous pipe RDONLY via /proc/self/fd/N does not
+// block waiting for a writer.
+TEST_F(PipeTest, OpenViaProcSelfFD) {
+ int fds[2];
+ ASSERT_THAT(pipe(fds), SyscallSucceeds());
+ FileDescriptor rfd(fds[0]);
+ FileDescriptor wfd(fds[1]);
+
+ // Close the write end of the pipe.
+ wfd.release();
+
+ // Open other side via /proc/self/fd. It should not block.
+ FileDescriptor proc_self_fd = ASSERT_NO_ERRNO_AND_VALUE(
+ Open(absl::StrCat("/proc/self/fd/", fds[0]), O_RDONLY));
+}
+
+// Test that opening and reading from an anonymous pipe (with existing writes)
+// RDONLY via /proc/self/fd/N returns the existing data.
+TEST_F(PipeTest, OpenViaProcSelfFDWithWrites) {
+ int fds[2];
+ ASSERT_THAT(pipe(fds), SyscallSucceeds());
+ FileDescriptor rfd(fds[0]);
+ FileDescriptor wfd(fds[1]);
+
+ // Write to the pipe and then close the write fd.
+ char data = 'x';
+ ASSERT_THAT(write(fds[1], &data, 1), SyscallSucceedsWithValue(1));
+ wfd.release();
+
+ // Open read side via /proc/self/fd, and read from it.
+ FileDescriptor proc_self_fd = ASSERT_NO_ERRNO_AND_VALUE(
+ Open(absl::StrCat("/proc/self/fd/", fds[0]), O_RDONLY));
+ char got;
+ ASSERT_THAT(read(proc_self_fd.get(), &got, 1), SyscallSucceedsWithValue(1));
+
+ // We should get what we sent.
+ EXPECT_EQ(got, data);
+}
+
+TEST_F(PipeTest, LargeFile) {
+ int fds[2];
+ ASSERT_THAT(pipe(fds), SyscallSucceeds());
+ FileDescriptor rfd(fds[0]);
+ FileDescriptor wfd(fds[1]);
+
+ int rflags;
+ EXPECT_THAT(rflags = fcntl(rfd.get(), F_GETFL), SyscallSucceeds());
+
+ // The kernel did *not* set O_LARGEFILE.
+ EXPECT_EQ(rflags, 0);
+}
+
+// Test that accessing /proc/<PID>/fd/<FD> correctly decrements the refcount of
+// that file descriptor.
+TEST_F(PipeTest, ProcFDReleasesFile) {
+ int fds[2];
+ ASSERT_THAT(pipe(fds), SyscallSucceeds());
+ FileDescriptor rfd(fds[0]);
+ FileDescriptor wfd(fds[1]);
+
+ // Stat the pipe FD, which shouldn't alter the refcount of the write end of
+ // the pipe.
+ struct stat wst;
+ ASSERT_THAT(lstat(absl::StrCat("/proc/self/fd/", wfd.get()).c_str(), &wst),
+ SyscallSucceeds());
+
+ // Close the write end of the pipe and ensure that read indicates EOF.
+ wfd.reset();
+ char buf;
+ ASSERT_THAT(read(rfd.get(), &buf, 1), SyscallSucceedsWithValue(0));
+}
+
+} // namespace
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/poll.cc b/test/syscalls/linux/poll.cc
new file mode 100644
index 000000000..897fd0bec
--- /dev/null
+++ b/test/syscalls/linux/poll.cc
@@ -0,0 +1,279 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <poll.h>
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <algorithm>
+
+#include "gtest/gtest.h"
+#include "absl/synchronization/notification.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/syscalls/linux/base_poll_test.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/logging.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+class PollTest : public BasePollTest {
+ protected:
+ void SetUp() override { BasePollTest::SetUp(); }
+ void TearDown() override { BasePollTest::TearDown(); }
+};
+
+TEST_F(PollTest, InvalidFds) {
+ // fds is invalid because it's null, but we tell ppoll the length is non-zero.
+ EXPECT_THAT(poll(nullptr, 1, 1), SyscallFailsWithErrno(EFAULT));
+ EXPECT_THAT(poll(nullptr, -1, 1), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_F(PollTest, NullFds) {
+ EXPECT_THAT(poll(nullptr, 0, 10), SyscallSucceeds());
+}
+
+TEST_F(PollTest, ZeroTimeout) {
+ EXPECT_THAT(poll(nullptr, 0, 0), SyscallSucceeds());
+}
+
+// If random S/R interrupts the poll, SIGALRM may be delivered before poll
+// restarts, causing the poll to hang forever.
+TEST_F(PollTest, NegativeTimeout_NoRandomSave) {
+ // Negative timeout mean wait forever so set a timer.
+ SetTimer(absl::Milliseconds(100));
+ EXPECT_THAT(poll(nullptr, 0, -1), SyscallFailsWithErrno(EINTR));
+ EXPECT_TRUE(TimerFired());
+}
+
+TEST_F(PollTest, NonBlockingEventPOLLIN) {
+ // Create a pipe.
+ int fds[2];
+ ASSERT_THAT(pipe(fds), SyscallSucceeds());
+
+ FileDescriptor fd0(fds[0]);
+ FileDescriptor fd1(fds[1]);
+
+ // Write some data to the pipe.
+ char s[] = "foo\n";
+ ASSERT_THAT(WriteFd(fd1.get(), s, strlen(s) + 1), SyscallSucceeds());
+
+ // Poll on the reader fd with POLLIN event.
+ struct pollfd poll_fd = {fd0.get(), POLLIN, 0};
+ EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 0), SyscallSucceedsWithValue(1));
+
+ // Should trigger POLLIN event.
+ EXPECT_EQ(poll_fd.revents & POLLIN, POLLIN);
+}
+
+TEST_F(PollTest, BlockingEventPOLLIN) {
+ // Create a pipe.
+ int fds[2];
+ ASSERT_THAT(pipe(fds), SyscallSucceeds());
+
+ FileDescriptor fd0(fds[0]);
+ FileDescriptor fd1(fds[1]);
+
+ // Start a blocking poll on the read fd.
+ absl::Notification notify;
+ ScopedThread t([&fd0, &notify]() {
+ notify.Notify();
+
+ // Poll on the reader fd with POLLIN event.
+ struct pollfd poll_fd = {fd0.get(), POLLIN, 0};
+ EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, -1), SyscallSucceedsWithValue(1));
+
+ // Should trigger POLLIN event.
+ EXPECT_EQ(poll_fd.revents & POLLIN, POLLIN);
+ });
+
+ notify.WaitForNotification();
+ absl::SleepFor(absl::Seconds(1.0));
+
+ // Write some data to the pipe.
+ char s[] = "foo\n";
+ ASSERT_THAT(WriteFd(fd1.get(), s, strlen(s) + 1), SyscallSucceeds());
+}
+
+TEST_F(PollTest, NonBlockingEventPOLLHUP) {
+ // Create a pipe.
+ int fds[2];
+ ASSERT_THAT(pipe(fds), SyscallSucceeds());
+
+ FileDescriptor fd0(fds[0]);
+ FileDescriptor fd1(fds[1]);
+
+ // Close the writer fd.
+ fd1.reset();
+
+ // Poll on the reader fd with POLLIN event.
+ struct pollfd poll_fd = {fd0.get(), POLLIN, 0};
+ EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 0), SyscallSucceedsWithValue(1));
+
+ // Should trigger POLLHUP event.
+ EXPECT_EQ(poll_fd.revents & POLLHUP, POLLHUP);
+
+ // Should not trigger POLLIN event.
+ EXPECT_EQ(poll_fd.revents & POLLIN, 0);
+}
+
+TEST_F(PollTest, BlockingEventPOLLHUP) {
+ // Create a pipe.
+ int fds[2];
+ ASSERT_THAT(pipe(fds), SyscallSucceeds());
+
+ FileDescriptor fd0(fds[0]);
+ FileDescriptor fd1(fds[1]);
+
+ // Start a blocking poll on the read fd.
+ absl::Notification notify;
+ ScopedThread t([&fd0, &notify]() {
+ notify.Notify();
+
+ // Poll on the reader fd with POLLIN event.
+ struct pollfd poll_fd = {fd0.get(), POLLIN, 0};
+ EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, -1), SyscallSucceedsWithValue(1));
+
+ // Should trigger POLLHUP event.
+ EXPECT_EQ(poll_fd.revents & POLLHUP, POLLHUP);
+
+ // Should not trigger POLLIN event.
+ EXPECT_EQ(poll_fd.revents & POLLIN, 0);
+ });
+
+ notify.WaitForNotification();
+ absl::SleepFor(absl::Seconds(1.0));
+
+ // Write some data and close the writer fd.
+ fd1.reset();
+}
+
+TEST_F(PollTest, NonBlockingEventPOLLERR) {
+ // Create a pipe.
+ int fds[2];
+ ASSERT_THAT(pipe(fds), SyscallSucceeds());
+
+ FileDescriptor fd0(fds[0]);
+ FileDescriptor fd1(fds[1]);
+
+ // Close the reader fd.
+ fd0.reset();
+
+ // Poll on the writer fd with POLLOUT event.
+ struct pollfd poll_fd = {fd1.get(), POLLOUT, 0};
+ EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 0), SyscallSucceedsWithValue(1));
+
+ // Should trigger POLLERR event.
+ EXPECT_EQ(poll_fd.revents & POLLERR, POLLERR);
+
+ // Should also trigger POLLOUT event.
+ EXPECT_EQ(poll_fd.revents & POLLOUT, POLLOUT);
+}
+
+// This test will validate that if an FD is already ready on some event, whether
+// it's POLLIN or POLLOUT it will not immediately return unless that's actually
+// what the caller was interested in.
+TEST_F(PollTest, ImmediatelyReturnOnlyOnPollEvents) {
+ // Create a pipe.
+ int fds[2];
+ ASSERT_THAT(pipe(fds), SyscallSucceeds());
+
+ FileDescriptor fd0(fds[0]);
+ FileDescriptor fd1(fds[1]);
+
+ // Wait for read related event on the write side of the pipe, since a write
+ // is possible on fds[1] it would mean that POLLOUT would return immediately.
+ // We should make sure that we're not woken up with that state that we didn't
+ // specificially request.
+ constexpr int kTimeoutMs = 100;
+ struct pollfd poll_fd = {fd1.get(), POLLIN | POLLPRI | POLLRDHUP, 0};
+ EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, kTimeoutMs),
+ SyscallSucceedsWithValue(0)); // We should timeout.
+ EXPECT_EQ(poll_fd.revents, 0); // Nothing should be in returned events.
+
+ // Now let's poll on POLLOUT and we should get back 1 fd as being ready and
+ // it should contain POLLOUT in the revents.
+ poll_fd.events = POLLOUT;
+ EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, kTimeoutMs),
+ SyscallSucceedsWithValue(1)); // 1 fd should have an event.
+ EXPECT_EQ(poll_fd.revents, POLLOUT); // POLLOUT should be in revents.
+}
+
+// This test validates that poll(2) while data is available immediately returns.
+TEST_F(PollTest, PollLevelTriggered) {
+ int fds[2] = {};
+ ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, /*protocol=*/0, fds),
+ SyscallSucceeds());
+
+ FileDescriptor fd0(fds[0]);
+ FileDescriptor fd1(fds[1]);
+
+ // Write two bytes to the socket.
+ const char* kBuf = "aa";
+ ASSERT_THAT(RetryEINTR(send)(fd0.get(), kBuf, /*len=*/2, /*flags=*/0),
+ SyscallSucceedsWithValue(2)); // 2 bytes should be written.
+
+ // Poll(2) should immediately return as there is data available to read.
+ constexpr int kInfiniteTimeout = -1;
+ struct pollfd poll_fd = {fd1.get(), POLLIN, 0};
+ ASSERT_THAT(RetryEINTR(poll)(&poll_fd, /*nfds=*/1, kInfiniteTimeout),
+ SyscallSucceedsWithValue(1)); // 1 fd should be ready to read.
+ EXPECT_NE(poll_fd.revents & POLLIN, 0);
+
+ // Read a single byte.
+ char read_byte = 0;
+ ASSERT_THAT(RetryEINTR(recv)(fd1.get(), &read_byte, /*len=*/1, /*flags=*/0),
+ SyscallSucceedsWithValue(1)); // 1 byte should be read.
+ ASSERT_EQ(read_byte, 'a'); // We should have read a single 'a'.
+
+ // Create a separate pollfd for our second poll.
+ struct pollfd poll_fd_after = {fd1.get(), POLLIN, 0};
+
+ // Poll(2) should again immediately return since we only read one byte.
+ ASSERT_THAT(RetryEINTR(poll)(&poll_fd_after, /*nfds=*/1, kInfiniteTimeout),
+ SyscallSucceedsWithValue(1)); // 1 fd should be ready to read.
+ EXPECT_NE(poll_fd_after.revents & POLLIN, 0);
+}
+
+TEST_F(PollTest, Nfds) {
+ // Stash value of RLIMIT_NOFILES.
+ struct rlimit rlim;
+ TEST_PCHECK(getrlimit(RLIMIT_NOFILE, &rlim) == 0);
+ rlim_t max_fds = rlim.rlim_cur;
+
+ // Create the biggest possible pollfd array such that each element is valid.
+ //
+ // Each entry in the 'fds' array refers to stdout (fd=1) and polls for
+ // "writable" events (events=POLLOUT). This essentially guarantees that the
+ // poll() is a no-op and allows negative testing of the 'nfds' parameter.
+ std::vector<struct pollfd> fds(max_fds, {.fd = 1, .events = POLLOUT});
+
+ // Verify that 'nfds' up to RLIMIT_NOFILE are allowed.
+ EXPECT_THAT(RetryEINTR(poll)(fds.data(), 1, 1), SyscallSucceedsWithValue(1));
+ EXPECT_THAT(RetryEINTR(poll)(fds.data(), max_fds / 2, 1),
+ SyscallSucceedsWithValue(max_fds / 2));
+ EXPECT_THAT(RetryEINTR(poll)(fds.data(), max_fds, 1),
+ SyscallSucceedsWithValue(max_fds));
+
+ // If 'nfds' exceeds RLIMIT_NOFILE then it must fail with EINVAL.
+ EXPECT_THAT(poll(fds.data(), max_fds + 1, 1), SyscallFailsWithErrno(EINVAL));
+}
+
+} // namespace
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/ppoll.cc b/test/syscalls/linux/ppoll.cc
new file mode 100644
index 000000000..f8c388c00
--- /dev/null
+++ b/test/syscalls/linux/ppoll.cc
@@ -0,0 +1,155 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <poll.h>
+#include <signal.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "absl/time/time.h"
+#include "test/syscalls/linux/base_poll_test.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+// Linux and glibc have a different idea of the sizeof sigset_t. When calling
+// the syscall directly, use what the kernel expects.
+unsigned kSigsetSize = SIGRTMAX / 8;
+
+// Linux ppoll(2) differs from the glibc wrapper function in that Linux updates
+// the timeout with the amount of time remaining. In order to test this behavior
+// we need to use the syscall directly.
+int syscallPpoll(struct pollfd* fds, nfds_t nfds, struct timespec* timeout_ts,
+ const sigset_t* sigmask, unsigned mask_size) {
+ return syscall(SYS_ppoll, fds, nfds, timeout_ts, sigmask, mask_size);
+}
+
+class PpollTest : public BasePollTest {
+ protected:
+ void SetUp() override { BasePollTest::SetUp(); }
+ void TearDown() override { BasePollTest::TearDown(); }
+};
+
+TEST_F(PpollTest, InvalidFds) {
+ // fds is invalid because it's null, but we tell ppoll the length is non-zero.
+ struct timespec timeout = {};
+ sigset_t sigmask;
+ TEST_PCHECK(sigemptyset(&sigmask) == 0);
+ EXPECT_THAT(syscallPpoll(nullptr, 1, &timeout, &sigmask, kSigsetSize),
+ SyscallFailsWithErrno(EFAULT));
+ EXPECT_THAT(syscallPpoll(nullptr, -1, &timeout, &sigmask, kSigsetSize),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+// See that when fds is null, ppoll behaves like sleep.
+TEST_F(PpollTest, NullFds) {
+ struct timespec timeout = absl::ToTimespec(absl::Milliseconds(10));
+ ASSERT_THAT(syscallPpoll(nullptr, 0, &timeout, nullptr, 0),
+ SyscallSucceeds());
+ EXPECT_EQ(timeout.tv_sec, 0);
+ EXPECT_EQ(timeout.tv_nsec, 0);
+}
+
+TEST_F(PpollTest, ZeroTimeout) {
+ struct timespec timeout = {};
+ ASSERT_THAT(syscallPpoll(nullptr, 0, &timeout, nullptr, 0),
+ SyscallSucceeds());
+ EXPECT_EQ(timeout.tv_sec, 0);
+ EXPECT_EQ(timeout.tv_nsec, 0);
+}
+
+// If random S/R interrupts the ppoll, SIGALRM may be delivered before ppoll
+// restarts, causing the ppoll to hang forever.
+TEST_F(PpollTest, NoTimeout_NoRandomSave) {
+ // When there's no timeout, ppoll may never return so set a timer.
+ SetTimer(absl::Milliseconds(100));
+ // See that we get interrupted by the timer.
+ ASSERT_THAT(syscallPpoll(nullptr, 0, nullptr, nullptr, 0),
+ SyscallFailsWithErrno(EINTR));
+ EXPECT_TRUE(TimerFired());
+}
+
+TEST_F(PpollTest, InvalidTimeoutNegative) {
+ struct timespec timeout = absl::ToTimespec(absl::Nanoseconds(-1));
+ EXPECT_THAT(syscallPpoll(nullptr, 0, &timeout, nullptr, 0),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_F(PpollTest, InvalidTimeoutNotNormalized) {
+ struct timespec timeout = {0, 1000000001};
+ EXPECT_THAT(syscallPpoll(nullptr, 0, &timeout, nullptr, 0),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_F(PpollTest, InvalidMaskSize) {
+ struct timespec timeout = {};
+ sigset_t sigmask;
+ TEST_PCHECK(sigemptyset(&sigmask) == 0);
+ EXPECT_THAT(syscallPpoll(nullptr, 0, &timeout, &sigmask, 128),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+// Verify that signals blocked by the ppoll mask (that would otherwise be
+// allowed) do not interrupt ppoll.
+TEST_F(PpollTest, SignalMaskBlocksSignal) {
+ absl::Duration duration(absl::Seconds(30));
+ struct timespec timeout = absl::ToTimespec(duration);
+ absl::Duration timer_duration(absl::Seconds(10));
+
+ // Call with a mask that blocks SIGALRM. See that ppoll is not interrupted
+ // (i.e. returns 0) and that upon completion, the timer has fired.
+ sigset_t mask;
+ ASSERT_THAT(sigprocmask(0, nullptr, &mask), SyscallSucceeds());
+ TEST_PCHECK(sigaddset(&mask, SIGALRM) == 0);
+ SetTimer(timer_duration);
+ MaybeSave();
+ ASSERT_FALSE(TimerFired());
+ ASSERT_THAT(syscallPpoll(nullptr, 0, &timeout, &mask, kSigsetSize),
+ SyscallSucceeds());
+ EXPECT_TRUE(TimerFired());
+ EXPECT_EQ(absl::DurationFromTimespec(timeout), absl::Duration());
+}
+
+// Verify that signals allowed by the ppoll mask (that would otherwise be
+// blocked) interrupt ppoll.
+TEST_F(PpollTest, SignalMaskAllowsSignal) {
+ absl::Duration duration(absl::Seconds(30));
+ struct timespec timeout = absl::ToTimespec(duration);
+ absl::Duration timer_duration(absl::Seconds(10));
+
+ sigset_t mask;
+ ASSERT_THAT(sigprocmask(0, nullptr, &mask), SyscallSucceeds());
+
+ // Block SIGALRM.
+ auto cleanup =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, SIGALRM));
+
+ // Call with a mask that unblocks SIGALRM. See that ppoll is interrupted.
+ SetTimer(timer_duration);
+ MaybeSave();
+ ASSERT_FALSE(TimerFired());
+ ASSERT_THAT(syscallPpoll(nullptr, 0, &timeout, &mask, kSigsetSize),
+ SyscallFailsWithErrno(EINTR));
+ EXPECT_TRUE(TimerFired());
+ EXPECT_GT(absl::DurationFromTimespec(timeout), absl::Duration());
+}
+
+} // namespace
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/prctl.cc b/test/syscalls/linux/prctl.cc
new file mode 100644
index 000000000..44f3df6a3
--- /dev/null
+++ b/test/syscalls/linux/prctl.cc
@@ -0,0 +1,171 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/prctl.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <string>
+
+#include "gtest/gtest.h"
+#include "test/util/capability_util.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+DEFINE_bool(prctl_no_new_privs_test_child, false,
+ "If true, exit with the return value of prctl(PR_GET_NO_NEW_PRIVS) "
+ "plus an offset (see test source).");
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(PrctlTest, NameInitialized) {
+ const size_t name_length = 20;
+ char name[name_length] = {};
+ ASSERT_THAT(prctl(PR_GET_NAME, name), SyscallSucceeds());
+ ASSERT_NE(std::string(name), "");
+}
+
+TEST(PrctlTest, SetNameLongName) {
+ const size_t name_length = 20;
+ const std::string long_name(name_length, 'A');
+ ASSERT_THAT(prctl(PR_SET_NAME, long_name.c_str()), SyscallSucceeds());
+ char truncated_name[name_length] = {};
+ ASSERT_THAT(prctl(PR_GET_NAME, truncated_name), SyscallSucceeds());
+ const size_t truncated_length = 15;
+ ASSERT_EQ(long_name.substr(0, truncated_length), std::string(truncated_name));
+}
+
+// Offset added to exit code from test child to distinguish from other abnormal
+// exits.
+constexpr int kPrctlNoNewPrivsTestChildExitBase = 100;
+
+TEST(PrctlTest, NoNewPrivsPreservedAcrossCloneForkAndExecve) {
+ // Check if no_new_privs is already set. If it is, we can still test that it's
+ // preserved across clone/fork/execve, but we also expect it to still be set
+ // at the end of the test. Otherwise, call prctl(PR_SET_NO_NEW_PRIVS) so as
+ // not to contaminate the original thread.
+ int no_new_privs;
+ ASSERT_THAT(no_new_privs = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0),
+ SyscallSucceeds());
+ ScopedThread([] {
+ ASSERT_THAT(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0), SyscallSucceeds());
+ EXPECT_THAT(prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0),
+ SyscallSucceedsWithValue(1));
+ ScopedThread([] {
+ EXPECT_THAT(prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0),
+ SyscallSucceedsWithValue(1));
+ // Note that these ASSERT_*s failing will only return from this thread,
+ // but this is the intended behavior.
+ pid_t child_pid = -1;
+ int execve_errno = 0;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ ForkAndExec("/proc/self/exe",
+ {"/proc/self/exe", "--prctl_no_new_privs_test_child"}, {},
+ nullptr, &child_pid, &execve_errno));
+
+ ASSERT_GT(child_pid, 0);
+ ASSERT_EQ(execve_errno, 0);
+
+ int status = 0;
+ ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0),
+ SyscallSucceeds());
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), kPrctlNoNewPrivsTestChildExitBase + 1);
+
+ EXPECT_THAT(prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0),
+ SyscallSucceedsWithValue(1));
+ });
+ EXPECT_THAT(prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0),
+ SyscallSucceedsWithValue(1));
+ });
+ EXPECT_THAT(prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0),
+ SyscallSucceedsWithValue(no_new_privs));
+}
+
+TEST(PrctlTest, PDeathSig) {
+ pid_t child_pid;
+
+ // Make the new process' parent a separate thread since the parent death
+ // signal fires when the parent *thread* exits.
+ ScopedThread([&] {
+ child_pid = fork();
+ TEST_CHECK(child_pid >= 0);
+ if (child_pid == 0) {
+ // In child process.
+ TEST_CHECK(prctl(PR_SET_PDEATHSIG, SIGKILL) >= 0);
+ int signo;
+ TEST_CHECK(prctl(PR_GET_PDEATHSIG, &signo) >= 0);
+ TEST_CHECK(signo == SIGKILL);
+ // Enable tracing, then raise SIGSTOP and expect our parent to suppress
+ // it.
+ TEST_CHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) >= 0);
+ raise(SIGSTOP);
+ // Sleep until killed by our parent death signal. sleep(3) is
+ // async-signal-safe, absl::SleepFor isn't.
+ while (true) {
+ sleep(10);
+ }
+ }
+ // In parent process.
+
+ // Wait for the child to send itself SIGSTOP and enter signal-delivery-stop.
+ int status;
+ ASSERT_THAT(waitpid(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
+ << "status = " << status;
+
+ // Suppress the SIGSTOP and detach from the child.
+ ASSERT_THAT(ptrace(PTRACE_DETACH, child_pid, 0, 0), SyscallSucceeds());
+ });
+
+ // The child should have been killed by its parent death SIGKILL.
+ int status;
+ ASSERT_THAT(waitpid(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
+ << "status = " << status;
+}
+
+// This test is to validate that calling prctl with PR_SET_MM without the
+// CAP_SYS_RESOURCE returns EPERM.
+TEST(PrctlTest, InvalidPrSetMM) {
+ if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_RESOURCE))) {
+ ASSERT_NO_ERRNO(SetCapability(CAP_SYS_RESOURCE,
+ false)); // Drop capability to test below.
+ }
+ ASSERT_THAT(prctl(PR_SET_MM, 0, 0, 0, 0), SyscallFailsWithErrno(EPERM));
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
+
+int main(int argc, char** argv) {
+ gvisor::testing::TestInit(&argc, &argv);
+
+ if (FLAGS_prctl_no_new_privs_test_child) {
+ exit(gvisor::testing::kPrctlNoNewPrivsTestChildExitBase +
+ prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0));
+ }
+
+ return RUN_ALL_TESTS();
+}
diff --git a/test/syscalls/linux/prctl_setuid.cc b/test/syscalls/linux/prctl_setuid.cc
new file mode 100644
index 000000000..c1b561464
--- /dev/null
+++ b/test/syscalls/linux/prctl_setuid.cc
@@ -0,0 +1,262 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sched.h>
+#include <sys/prctl.h>
+#include <string>
+
+#include "gtest/gtest.h"
+#include "test/util/capability_util.h"
+#include "test/util/logging.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+DEFINE_int32(scratch_uid, 65534, "scratch UID");
+// This flag is used to verify that after an exec PR_GET_KEEPCAPS
+// returns 0, the return code will be offset by kPrGetKeepCapsExitBase.
+DEFINE_bool(prctl_pr_get_keepcaps, false,
+ "If true the test will verify that prctl with pr_get_keepcaps"
+ "returns 0. The test will exit with the result of that check.");
+
+// These tests exist seperately from prctl because we need to start
+// them as root. Setuid() has the behavior that permissions are fully
+// removed if one of the UIDs were 0 before a setuid() call. This
+// behavior can be changed by using PR_SET_KEEPCAPS and that is what
+// is tested here.
+//
+// Reference setuid(2):
+// The setuid() function checks the effective user ID of
+// the caller and if it is the superuser, all process-related user ID's
+// are set to uid. After this has occurred, it is impossible for the
+// program to regain root privileges.
+//
+// Thus, a set-user-ID-root program wishing to temporarily drop root
+// privileges, assume the identity of an unprivileged user, and then
+// regain root privileges afterward cannot use setuid(). You can
+// accomplish this with seteuid(2).
+namespace gvisor {
+namespace testing {
+
+// Offset added to exit code from test child to distinguish from other abnormal
+// exits.
+constexpr int kPrGetKeepCapsExitBase = 100;
+
+namespace {
+
+class PrctlKeepCapsSetuidTest : public ::testing::Test {
+ protected:
+ void SetUp() override {
+ // PR_GET_KEEPCAPS will only return 0 or 1 (on success).
+ ASSERT_THAT(original_keepcaps_ = prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0),
+ SyscallSucceeds());
+ ASSERT_TRUE(original_keepcaps_ == 0 || original_keepcaps_ == 1);
+ }
+
+ void TearDown() override {
+ // Restore PR_SET_KEEPCAPS.
+ ASSERT_THAT(prctl(PR_SET_KEEPCAPS, original_keepcaps_, 0, 0, 0),
+ SyscallSucceeds());
+
+ // Verify that it was restored.
+ ASSERT_THAT(prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0),
+ SyscallSucceedsWithValue(original_keepcaps_));
+ }
+
+ // The original keep caps value exposed so tests can use it if they need.
+ int original_keepcaps_ = 0;
+};
+
+// This test will verify that a bad value, eg. not 0 or 1 for
+// PR_SET_KEEPCAPS will return EINVAL as required by prctl(2).
+TEST_F(PrctlKeepCapsSetuidTest, PrctlBadArgsToKeepCaps) {
+ ASSERT_THAT(prctl(PR_SET_KEEPCAPS, 2, 0, 0, 0),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+// This test will verify that a setuid(2) without PR_SET_KEEPCAPS will cause
+// all capabilities to be dropped.
+TEST_F(PrctlKeepCapsSetuidTest, SetUidNoKeepCaps) {
+ // getuid(2) never fails.
+ if (getuid() != 0) {
+ SKIP_IF(!IsRunningOnGvisor());
+ FAIL() << "User is not root on gvisor platform.";
+ }
+
+ // Do setuid in a separate thread so that after finishing this test, the
+ // process can still open files the test harness created before starting
+ // this test. Otherwise, the files are created by root (UID before the
+ // test), but cannot be opened by the `uid` set below after the test. After
+ // calling setuid(non-zero-UID), there is no way to get root privileges
+ // back.
+ ScopedThread([] {
+ // Start by verifying we have a capability.
+ TEST_CHECK(HaveCapability(CAP_SYS_ADMIN).ValueOrDie());
+
+ // Verify that PR_GET_KEEPCAPS is disabled.
+ ASSERT_THAT(prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0),
+ SyscallSucceedsWithValue(0));
+
+ // Use syscall instead of glibc setuid wrapper because we want this setuid
+ // call to only apply to this task. POSIX threads, however, require that
+ // all threads have the same UIDs, so using the setuid wrapper sets all
+ // threads' real UID.
+ EXPECT_THAT(syscall(SYS_setuid, FLAGS_scratch_uid), SyscallSucceeds());
+
+ // Verify that we changed uid.
+ EXPECT_THAT(getuid(), SyscallSucceedsWithValue(FLAGS_scratch_uid));
+
+ // Verify we lost the capability in the effective set, this always happens.
+ TEST_CHECK(!HaveCapability(CAP_SYS_ADMIN).ValueOrDie());
+
+ // We should have also lost it in the permitted set by the setuid() so
+ // SetCapability should fail when we try to add it back to the effective set
+ ASSERT_FALSE(SetCapability(CAP_SYS_ADMIN, true).ok());
+ });
+}
+
+// This test will verify that a setuid with PR_SET_KEEPCAPS will cause
+// capabilities to be retained after we switch away from the root user.
+TEST_F(PrctlKeepCapsSetuidTest, SetUidKeepCaps) {
+ // getuid(2) never fails.
+ if (getuid() != 0) {
+ SKIP_IF(!IsRunningOnGvisor());
+ FAIL() << "User is not root on gvisor platform.";
+ }
+
+ // Do setuid in a separate thread so that after finishing this test, the
+ // process can still open files the test harness created before starting
+ // this test. Otherwise, the files are created by root (UID before the
+ // test), but cannot be opened by the `uid` set below after the test. After
+ // calling setuid(non-zero-UID), there is no way to get root privileges
+ // back.
+ ScopedThread([] {
+ // Start by verifying we have a capability.
+ TEST_CHECK(HaveCapability(CAP_SYS_ADMIN).ValueOrDie());
+
+ // Set PR_SET_KEEPCAPS.
+ ASSERT_THAT(prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0), SyscallSucceeds());
+
+ // Verify PR_SET_KEEPCAPS was set before we proceed.
+ ASSERT_THAT(prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0),
+ SyscallSucceedsWithValue(1));
+
+ // Use syscall instead of glibc setuid wrapper because we want this setuid
+ // call to only apply to this task. POSIX threads, however, require that
+ // all threads have the same UIDs, so using the setuid wrapper sets all
+ // threads' real UID.
+ EXPECT_THAT(syscall(SYS_setuid, FLAGS_scratch_uid), SyscallSucceeds());
+
+ // Verify that we changed uid.
+ EXPECT_THAT(getuid(), SyscallSucceedsWithValue(FLAGS_scratch_uid));
+
+ // Verify we lost the capability in the effective set, this always happens.
+ TEST_CHECK(!HaveCapability(CAP_SYS_ADMIN).ValueOrDie());
+
+ // We lost the capability in the effective set, but it will still
+ // exist in the permitted set so we can elevate the capability.
+ ASSERT_NO_ERRNO(SetCapability(CAP_SYS_ADMIN, true));
+
+ // Verify we got back the capability in the effective set.
+ TEST_CHECK(HaveCapability(CAP_SYS_ADMIN).ValueOrDie());
+ });
+}
+
+// This test will verify that PR_SET_KEEPCAPS is not retained
+// across an execve. According to prctl(2):
+// "The "keep capabilities" value will be reset to 0 on subsequent
+// calls to execve(2)."
+TEST_F(PrctlKeepCapsSetuidTest, NoKeepCapsAfterExec) {
+ ASSERT_THAT(prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0), SyscallSucceeds());
+
+ // Verify PR_SET_KEEPCAPS was set before we proceed.
+ ASSERT_THAT(prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0), SyscallSucceedsWithValue(1));
+
+ pid_t child_pid = -1;
+ int execve_errno = 0;
+ // Do an exec and then verify that PR_GET_KEEPCAPS returns 0
+ // see the body of main below.
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ForkAndExec(
+ "/proc/self/exe", {"/proc/self/exe", "--prctl_pr_get_keepcaps"}, {},
+ nullptr, &child_pid, &execve_errno));
+
+ ASSERT_GT(child_pid, 0);
+ ASSERT_EQ(execve_errno, 0);
+
+ int status = 0;
+ ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+ ASSERT_TRUE(WIFEXITED(status));
+ // PR_SET_KEEPCAPS should have been cleared by the exec.
+ // Success should return gvisor::testing::kPrGetKeepCapsExitBase + 0
+ ASSERT_EQ(WEXITSTATUS(status), kPrGetKeepCapsExitBase);
+}
+
+TEST_F(PrctlKeepCapsSetuidTest, NoKeepCapsAfterNewUserNamespace) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace()));
+
+ // Fork to avoid changing the user namespace of the original test process.
+ pid_t const child_pid = fork();
+
+ if (child_pid == 0) {
+ // Verify that the keepcaps flag is set to 0 when we change user namespaces.
+ TEST_PCHECK(prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0) == 0);
+ MaybeSave();
+
+ TEST_PCHECK(prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0) == 1);
+ MaybeSave();
+
+ TEST_PCHECK(unshare(CLONE_NEWUSER) == 0);
+ MaybeSave();
+
+ TEST_PCHECK(prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0) == 0);
+ MaybeSave();
+
+ _exit(0);
+ }
+
+ int status;
+ ASSERT_THAT(child_pid, SyscallSucceeds());
+ ASSERT_THAT(waitpid(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << "status = " << status;
+}
+
+// This test will verify that PR_SET_KEEPCAPS and PR_GET_KEEPCAPS work correctly
+TEST_F(PrctlKeepCapsSetuidTest, PrGetKeepCaps) {
+ // Set PR_SET_KEEPCAPS to the negation of the original.
+ ASSERT_THAT(prctl(PR_SET_KEEPCAPS, !original_keepcaps_, 0, 0, 0),
+ SyscallSucceeds());
+
+ // Verify it was set.
+ ASSERT_THAT(prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0),
+ SyscallSucceedsWithValue(!original_keepcaps_));
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
+
+int main(int argc, char** argv) {
+ gvisor::testing::TestInit(&argc, &argv);
+
+ if (FLAGS_prctl_pr_get_keepcaps) {
+ return gvisor::testing::kPrGetKeepCapsExitBase +
+ prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0);
+ }
+
+ return RUN_ALL_TESTS();
+}
diff --git a/test/syscalls/linux/pread64.cc b/test/syscalls/linux/pread64.cc
new file mode 100644
index 000000000..4e5bcfcde
--- /dev/null
+++ b/test/syscalls/linux/pread64.cc
@@ -0,0 +1,152 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+class Pread64Test : public ::testing::Test {
+ void SetUp() override {
+ name_ = NewTempAbsPath();
+ ASSERT_NO_ERRNO_AND_VALUE(Open(name_, O_CREAT, 0644));
+ }
+
+ void TearDown() override { unlink(name_.c_str()); }
+
+ public:
+ std::string name_;
+};
+
+TEST(Pread64TestNoTempFile, BadFileDescriptor) {
+ char buf[1024];
+ EXPECT_THAT(pread64(-1, buf, 1024, 0), SyscallFailsWithErrno(EBADF));
+}
+
+TEST_F(Pread64Test, ZeroBuffer) {
+ const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(name_, O_RDWR));
+
+ char msg[] = "hello world";
+ EXPECT_THAT(pwrite64(fd.get(), msg, strlen(msg), 0),
+ SyscallSucceedsWithValue(strlen(msg)));
+
+ char buf[10];
+ EXPECT_THAT(pread64(fd.get(), buf, 0, 0), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(Pread64Test, BadBuffer) {
+ const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(name_, O_RDWR));
+
+ char msg[] = "hello world";
+ EXPECT_THAT(pwrite64(fd.get(), msg, strlen(msg), 0),
+ SyscallSucceedsWithValue(strlen(msg)));
+
+ char* bad_buffer = nullptr;
+ EXPECT_THAT(pread64(fd.get(), bad_buffer, 1024, 0),
+ SyscallFailsWithErrno(EFAULT));
+}
+
+TEST_F(Pread64Test, WriteOnlyNotReadable) {
+ const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(name_, O_WRONLY));
+
+ char buf[1024];
+ EXPECT_THAT(pread64(fd.get(), buf, 1024, 0), SyscallFailsWithErrno(EBADF));
+}
+
+TEST_F(Pread64Test, DirNotReadable) {
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(GetAbsoluteTestTmpdir(), O_RDONLY));
+
+ char buf[1024];
+ EXPECT_THAT(pread64(fd.get(), buf, 1024, 0), SyscallFailsWithErrno(EISDIR));
+}
+
+TEST_F(Pread64Test, BadOffset) {
+ const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(name_, O_RDONLY));
+
+ char buf[1024];
+ EXPECT_THAT(pread64(fd.get(), buf, 1024, -1), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_F(Pread64Test, OffsetNotIncremented) {
+ const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(name_, O_RDWR));
+
+ char msg[] = "hello world";
+ EXPECT_THAT(write(fd.get(), msg, strlen(msg)),
+ SyscallSucceedsWithValue(strlen(msg)));
+ int offset;
+ EXPECT_THAT(offset = lseek(fd.get(), 0, SEEK_CUR), SyscallSucceeds());
+
+ char buf1[1024];
+ EXPECT_THAT(pread64(fd.get(), buf1, 1024, 0),
+ SyscallSucceedsWithValue(strlen(msg)));
+ EXPECT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(offset));
+
+ char buf2[1024];
+ EXPECT_THAT(pread64(fd.get(), buf2, 1024, 3),
+ SyscallSucceedsWithValue(strlen(msg) - 3));
+ EXPECT_THAT(lseek(fd.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(offset));
+}
+
+TEST_F(Pread64Test, EndOfFile) {
+ const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(name_, O_RDONLY));
+
+ char buf[1024];
+ EXPECT_THAT(pread64(fd.get(), buf, 1024, 0), SyscallSucceedsWithValue(0));
+}
+
+TEST(Pread64TestNoTempFile, CantReadSocketPair_NoRandomSave) {
+ int sock_fds[2];
+ EXPECT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds), SyscallSucceeds());
+
+ char buf[1024];
+ EXPECT_THAT(pread64(sock_fds[0], buf, 1024, 0),
+ SyscallFailsWithErrno(ESPIPE));
+ EXPECT_THAT(pread64(sock_fds[1], buf, 1024, 0),
+ SyscallFailsWithErrno(ESPIPE));
+
+ EXPECT_THAT(close(sock_fds[0]), SyscallSucceeds());
+ EXPECT_THAT(close(sock_fds[1]), SyscallSucceeds());
+}
+
+TEST(Pread64TestNoTempFile, CantReadPipe) {
+ char buf[1024];
+
+ int pipe_fds[2];
+ EXPECT_THAT(pipe(pipe_fds), SyscallSucceeds());
+
+ EXPECT_THAT(pread64(pipe_fds[0], buf, 1024, 0),
+ SyscallFailsWithErrno(ESPIPE));
+
+ EXPECT_THAT(close(pipe_fds[0]), SyscallSucceeds());
+ EXPECT_THAT(close(pipe_fds[1]), SyscallSucceeds());
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/preadv.cc b/test/syscalls/linux/preadv.cc
new file mode 100644
index 000000000..8d3aed43c
--- /dev/null
+++ b/test/syscalls/linux/preadv.cc
@@ -0,0 +1,94 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <atomic>
+#include <string>
+
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/logging.h"
+#include "test/util/memory_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+#include "test/util/timer_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+TEST(PreadvTest, MMConcurrencyStress) {
+ // Fill a one-page file with zeroes (the contents don't really matter).
+ const auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ /* parent = */ GetAbsoluteTestTmpdir(),
+ /* content = */ std::string(kPageSize, 0), TempPath::kDefaultFileMode));
+ const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY));
+
+ // Get a one-page private mapping to read to.
+ const Mapping m = ASSERT_NO_ERRNO_AND_VALUE(
+ MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+
+ // Repeatedly fork in a separate thread to force the mapping to become
+ // copy-on-write.
+ std::atomic<bool> done(false);
+ const ScopedThread t([&] {
+ while (!done.load()) {
+ const pid_t pid = fork();
+ TEST_CHECK(pid >= 0);
+ if (pid == 0) {
+ // In child. The parent was obviously multithreaded, so it's neither
+ // safe nor necessary to do much more than exit.
+ syscall(SYS_exit_group, 0);
+ }
+ int status;
+ ASSERT_THAT(RetryEINTR(waitpid)(pid, &status, 0),
+ SyscallSucceedsWithValue(pid));
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << "status = " << status;
+ }
+ });
+
+ // Repeatedly read to the mapping.
+ struct iovec iov[2];
+ iov[0].iov_base = m.ptr();
+ iov[0].iov_len = kPageSize / 2;
+ iov[1].iov_base = reinterpret_cast<void*>(m.addr() + kPageSize / 2);
+ iov[1].iov_len = kPageSize / 2;
+ constexpr absl::Duration kTestDuration = absl::Seconds(5);
+ const absl::Time end = absl::Now() + kTestDuration;
+ while (absl::Now() < end) {
+ // Among other causes, save/restore cycles may cause interruptions resulting
+ // in partial reads, so we don't expect any particular return value.
+ EXPECT_THAT(RetryEINTR(preadv)(fd.get(), iov, 2, 0), SyscallSucceeds());
+ }
+
+ // Stop the other thread.
+ done.store(true);
+
+ // The test passes if it neither deadlocks nor crashes the OS.
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/preadv2.cc b/test/syscalls/linux/preadv2.cc
new file mode 100644
index 000000000..642eed624
--- /dev/null
+++ b/test/syscalls/linux/preadv2.cc
@@ -0,0 +1,217 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <atomic>
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/file_base.h"
+#include "test/syscalls/linux/readv_common.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/memory_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+#ifndef SYS_preadv2
+#if defined(__x86_64__)
+#define SYS_preadv2 327
+#else
+#error "Unknown architecture"
+#endif
+#endif // SYS_preadv2
+
+#ifndef RWF_HIPRI
+#define RWF_HIPRI 0x1
+#endif // RWF_HIPRI
+
+constexpr int kBufSize = 1024;
+
+std::string SetContent() {
+ std::string content;
+ for (int i = 0; i < kBufSize; i++) {
+ content += static_cast<char>((i % 10) + '0');
+ }
+ return content;
+}
+
+// This test is the base case where we call preadv (no offset, no flags).
+TEST(Preadv2Test, TestBaseCall) {
+ if (!IsRunningOnGvisor()) {
+ SKIP_BEFORE_KERNEL(/*major_version=*/4, /*minor_version=*/6);
+ }
+ std::string content = SetContent();
+
+ const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), content, TempPath::kDefaultFileMode));
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY));
+
+ std::vector<char> buf(kBufSize);
+ struct iovec iov;
+ iov.iov_base = buf.data();
+ iov.iov_len = buf.size();
+
+ EXPECT_THAT(syscall(SYS_preadv2, fd.get(), &iov, /*iov_cnt*/ 1,
+ /*offset=*/0, /*flags=*/0),
+ SyscallSucceedsWithValue(kBufSize));
+
+ EXPECT_EQ(content, std::string(buf.data(), buf.size()));
+}
+
+// This test is where we call preadv with an offset and no flags.
+TEST(Preadv2Test, TestValidPositiveOffset) {
+ if (!IsRunningOnGvisor()) {
+ SKIP_BEFORE_KERNEL(/*major_version=*/4, /*minor_version=*/6);
+ }
+ std::string content = SetContent();
+ const std::string prefix = "0";
+
+ const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), prefix + content, TempPath::kDefaultFileMode));
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY));
+
+ std::vector<char> buf(kBufSize, '0');
+ struct iovec iov;
+ iov.iov_base = buf.data();
+ iov.iov_len = buf.size();
+
+ EXPECT_THAT(syscall(SYS_preadv2, fd.get(), &iov, /*iov_cnt=*/1,
+ /*offset=*/prefix.size(), /*flags=*/0),
+ SyscallSucceedsWithValue(kBufSize));
+
+ EXPECT_EQ(content, std::string(buf.data(), buf.size()));
+}
+
+// This test is the base case where we call readv by using -1 as the offset. The
+// read should use the file offset, so the test increments it by one prior to
+// calling preadv2.
+TEST(Preadv2Test, TestNegativeOneOffset) {
+ if (!IsRunningOnGvisor()) {
+ SKIP_BEFORE_KERNEL(/*major_version=*/4, /*minor_version=*/6);
+ }
+ std::string content = SetContent();
+ const std::string prefix = "231";
+
+ const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), prefix + content, TempPath::kDefaultFileMode));
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY));
+ ASSERT_THAT(lseek(fd.get(), prefix.size(), SEEK_SET),
+ SyscallSucceedsWithValue(prefix.size()));
+
+ std::vector<char> buf(kBufSize, '0');
+ struct iovec iov;
+ iov.iov_base = buf.data();
+ iov.iov_len = buf.size();
+
+ EXPECT_THAT(syscall(SYS_preadv2, fd.get(), &iov, /*iov_cnt=*/1,
+ /*offset=*/static_cast<off_t>(-1), /*flags=*/0),
+ SyscallSucceedsWithValue(kBufSize));
+
+ EXPECT_EQ(content, std::string(buf.data(), buf.size()));
+}
+
+// This test calls preadv2 with an invalid flag.
+TEST(Preadv2Test, TestInvalidFlag) {
+ if (!IsRunningOnGvisor()) {
+ SKIP_BEFORE_KERNEL(/*major_version=*/4, /*minor_version=*/6);
+ }
+
+ const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), "", TempPath::kDefaultFileMode));
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY | O_DIRECT));
+
+ struct iovec iov;
+
+ EXPECT_THAT(syscall(SYS_preadv2, fd.get(), &iov, /*iov_cnt=*/1,
+ /*offset=*/0, /*flags=*/RWF_HIPRI << 1),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+// This test calls preadv2 with an invalid offset.
+TEST(Preadv2Test, TestInvalidOffset) {
+ if (!IsRunningOnGvisor()) {
+ SKIP_BEFORE_KERNEL(/*major_version=*/4, /*minor_version=*/6);
+ }
+
+ const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), "", TempPath::kDefaultFileMode));
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY | O_DIRECT));
+ struct iovec iov;
+
+ EXPECT_THAT(syscall(SYS_preadv2, fd.get(), &iov, /*iov_cnt=*/1,
+ /*offset=*/static_cast<off_t>(-8), /*flags=*/RWF_HIPRI),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+// This test calls preadv with a file set O_WRONLY.
+TEST(Preadv2Test, TestUnreadableFile) {
+ if (!IsRunningOnGvisor()) {
+ SKIP_BEFORE_KERNEL(/*major_version=*/4, /*minor_version=*/6);
+ }
+
+ const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), "", TempPath::kDefaultFileMode));
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_WRONLY));
+ struct iovec iov;
+
+ EXPECT_THAT(syscall(SYS_preadv2, fd.get(), &iov, /*iov_cnt=*/1,
+ /*offset=*/0, /*flags=*/0),
+ SyscallFailsWithErrno(EBADF));
+}
+
+// Calling preadv2 with a non-negative offset calls preadv. Calling preadv with
+// an unseekable file is not allowed. A pipe is used for an unseekable file.
+TEST(Preadv2Test, TestUnseekableFile) {
+ if (!IsRunningOnGvisor()) {
+ SKIP_BEFORE_KERNEL(/*major_version=*/4, /*minor_version=*/6);
+ }
+
+ int pipe_fds[2];
+
+ ASSERT_THAT(pipe(pipe_fds), SyscallSucceeds());
+
+ struct iovec iov;
+
+ EXPECT_THAT(syscall(SYS_preadv2, pipe_fds[0], &iov, /*iov_cnt=*/1,
+ /*offset=*/2, /*flags=*/0),
+ SyscallFailsWithErrno(ESPIPE));
+
+ EXPECT_THAT(close(pipe_fds[0]), SyscallSucceeds());
+ EXPECT_THAT(close(pipe_fds[1]), SyscallSucceeds());
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/priority.cc b/test/syscalls/linux/priority.cc
new file mode 100644
index 000000000..69a58a422
--- /dev/null
+++ b/test/syscalls/linux/priority.cc
@@ -0,0 +1,215 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "absl/strings/numbers.h"
+#include "absl/strings/str_split.h"
+#include "test/util/capability_util.h"
+#include "test/util/fs_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// These tests are for both the getpriority(2) and setpriority(2) syscalls
+// These tests are very rudimentary because getpriority and setpriority
+// have not yet been fully implemented.
+
+// Getpriority does something
+TEST(GetpriorityTest, Implemented) {
+ // "getpriority() can legitimately return the value -1, it is necessary to
+ // clear the external variable errno prior to the call"
+ errno = 0;
+ EXPECT_THAT(getpriority(PRIO_PROCESS, /*who=*/0), SyscallSucceeds());
+}
+
+// Invalid which
+TEST(GetpriorityTest, InvalidWhich) {
+ errno = 0;
+ EXPECT_THAT(getpriority(/*which=*/3, /*who=*/0),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+// Process is found when which=PRIO_PROCESS
+TEST(GetpriorityTest, ValidWho) {
+ errno = 0;
+ EXPECT_THAT(getpriority(PRIO_PROCESS, getpid()), SyscallSucceeds());
+}
+
+// Process is not found when which=PRIO_PROCESS
+TEST(GetpriorityTest, InvalidWho) {
+ errno = 0;
+ // Flaky, but it's tough to avoid a race condition when finding an unused pid
+ EXPECT_THAT(getpriority(PRIO_PROCESS, /*who=*/INT_MAX - 1),
+ SyscallFailsWithErrno(ESRCH));
+}
+
+// Setpriority does something
+TEST(SetpriorityTest, Implemented) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_NICE)));
+
+ // No need to clear errno for setpriority():
+ // "The setpriority() call returns 0 if there is no error, or -1 if there is"
+ EXPECT_THAT(setpriority(PRIO_PROCESS, /*who=*/0, /*nice=*/16),
+ SyscallSucceeds());
+}
+
+// Invalid which
+TEST(Setpriority, InvalidWhich) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_NICE)));
+
+ EXPECT_THAT(setpriority(/*which=*/3, /*who=*/0, /*nice=*/16),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+// Process is found when which=PRIO_PROCESS
+TEST(SetpriorityTest, ValidWho) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_NICE)));
+
+ EXPECT_THAT(setpriority(PRIO_PROCESS, getpid(), /*nice=*/16),
+ SyscallSucceeds());
+}
+
+// niceval is within the range [-20, 19]
+TEST(SetpriorityTest, InsideRange) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_NICE)));
+
+ // Set 0 < niceval < 19
+ int nice = 12;
+ EXPECT_THAT(setpriority(PRIO_PROCESS, getpid(), nice), SyscallSucceeds());
+
+ errno = 0;
+ EXPECT_THAT(getpriority(PRIO_PROCESS, getpid()),
+ SyscallSucceedsWithValue(nice));
+
+ // Set -20 < niceval < 0
+ nice = -12;
+ EXPECT_THAT(setpriority(PRIO_PROCESS, getpid(), nice), SyscallSucceeds());
+
+ errno = 0;
+ EXPECT_THAT(getpriority(PRIO_PROCESS, getpid()),
+ SyscallSucceedsWithValue(nice));
+}
+
+// Verify that priority/niceness are exposed via /proc/PID/stat.
+TEST(SetpriorityTest, NicenessExposedViaProcfs) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_NICE)));
+
+ constexpr int kNiceVal = 12;
+ ASSERT_THAT(setpriority(PRIO_PROCESS, getpid(), kNiceVal), SyscallSucceeds());
+
+ errno = 0;
+ ASSERT_THAT(getpriority(PRIO_PROCESS, getpid()),
+ SyscallSucceedsWithValue(kNiceVal));
+
+ // Now verify we can read that same value via /proc/self/stat.
+ std::string proc_stat;
+ ASSERT_NO_ERRNO(GetContents("/proc/self/stat", &proc_stat));
+ std::vector<std::string> pieces = absl::StrSplit(proc_stat, ' ');
+ ASSERT_GT(pieces.size(), 20);
+
+ int niceness_procfs = 0;
+ ASSERT_TRUE(absl::SimpleAtoi(pieces[18], &niceness_procfs));
+ EXPECT_EQ(niceness_procfs, kNiceVal);
+}
+
+// In the kernel's implementation, values outside the range of [-20, 19] are
+// truncated to these minimum and maximum values. See
+// https://elixir.bootlin.com/linux/v4.4/source/kernel/sys.c#L190
+TEST(SetpriorityTest, OutsideRange) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_NICE)));
+
+ // Set niceval > 19
+ EXPECT_THAT(setpriority(PRIO_PROCESS, getpid(), /*nice=*/100),
+ SyscallSucceeds());
+
+ errno = 0;
+ // Test niceval truncated to 19
+ EXPECT_THAT(getpriority(PRIO_PROCESS, getpid()),
+ SyscallSucceedsWithValue(/*maxnice=*/19));
+
+ // Set niceval < -20
+ EXPECT_THAT(setpriority(PRIO_PROCESS, getpid(), /*nice=*/-100),
+ SyscallSucceeds());
+
+ errno = 0;
+ // Test niceval truncated to -20
+ EXPECT_THAT(getpriority(PRIO_PROCESS, getpid()),
+ SyscallSucceedsWithValue(/*minnice=*/-20));
+}
+
+// Process is not found when which=PRIO_PROCESS
+TEST(SetpriorityTest, InvalidWho) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_NICE)));
+
+ // Flaky, but it's tough to avoid a race condition when finding an unused pid
+ EXPECT_THAT(setpriority(PRIO_PROCESS,
+ /*who=*/INT_MAX - 1,
+ /*nice=*/16),
+ SyscallFailsWithErrno(ESRCH));
+}
+
+// Nice succeeds, correctly modifies (or in this case does not
+// modify priority of process
+TEST(SetpriorityTest, NiceSucceeds) {
+ errno = 0;
+ const int priority_before = getpriority(PRIO_PROCESS, /*who=*/0);
+ ASSERT_THAT(nice(/*inc=*/0), SyscallSucceeds());
+
+ // nice(0) should not change priority
+ EXPECT_EQ(priority_before, getpriority(PRIO_PROCESS, /*who=*/0));
+}
+
+// Threads resulting from clone() maintain parent's priority
+// Changes to child priority do not affect parent's priority
+TEST(GetpriorityTest, CloneMaintainsPriority) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_NICE)));
+
+ constexpr int kParentPriority = 16;
+ constexpr int kChildPriority = 14;
+ ASSERT_THAT(setpriority(PRIO_PROCESS, getpid(), kParentPriority),
+ SyscallSucceeds());
+
+ ScopedThread([kParentPriority, kChildPriority]() {
+ // Check that priority equals that of parent thread
+ pid_t my_tid;
+ EXPECT_THAT(my_tid = syscall(__NR_gettid), SyscallSucceeds());
+ EXPECT_THAT(getpriority(PRIO_PROCESS, my_tid),
+ SyscallSucceedsWithValue(kParentPriority));
+
+ // Change the child thread's priority
+ EXPECT_THAT(setpriority(PRIO_PROCESS, my_tid, kChildPriority),
+ SyscallSucceeds());
+ });
+
+ // Check that parent's priority reemained the same even though
+ // the child's priority was altered
+ EXPECT_EQ(kParentPriority, getpriority(PRIO_PROCESS, syscall(__NR_gettid)));
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/priority_execve.cc b/test/syscalls/linux/priority_execve.cc
new file mode 100644
index 000000000..5604bd3d0
--- /dev/null
+++ b/test/syscalls/linux/priority_execve.cc
@@ -0,0 +1,42 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+int main(int argc, char** argv, char** envp) {
+ errno = 0;
+ int prio = getpriority(PRIO_PROCESS, getpid());
+
+ // NOTE: getpriority() can legitimately return negative values
+ // in the range [-20, 0). If errno is set, exit with a value that
+ // could not be reached by a valid priority. Valid exit values
+ // for the test are in the range [1, 40], so we'll use 0.
+ if (errno != 0) {
+ printf("getpriority() failed with errno = %d\n", errno);
+ exit(0);
+ }
+
+ // Used by test to verify priority is being maintained through
+ // calls to execve(). Since prio should always be in the range
+ // [-20, 19], we offset by 20 so as not to have negative exit codes.
+ exit(20 - prio);
+
+ return 0;
+}
diff --git a/test/syscalls/linux/proc.cc b/test/syscalls/linux/proc.cc
new file mode 100644
index 000000000..e64df97b0
--- /dev/null
+++ b/test/syscalls/linux/proc.cc
@@ -0,0 +1,1830 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <elf.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <signal.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+#include <sys/utsname.h>
+#include <syscall.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <atomic>
+#include <functional>
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/strings/ascii.h"
+#include "absl/strings/match.h"
+#include "absl/strings/numbers.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_split.h"
+#include "absl/strings/string_view.h"
+#include "absl/synchronization/mutex.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/capability_util.h"
+#include "test/util/cleanup.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/memory_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+#include "test/util/timer_util.h"
+
+// NOTE: No, this isn't really a syscall but this is a really simple
+// way to get it tested on both gVisor, PTrace and Linux.
+
+using ::testing::AllOf;
+using ::testing::ContainerEq;
+using ::testing::Contains;
+using ::testing::ContainsRegex;
+using ::testing::Gt;
+using ::testing::HasSubstr;
+using ::testing::IsSupersetOf;
+using ::testing::Pair;
+using ::testing::UnorderedElementsAre;
+using ::testing::UnorderedElementsAreArray;
+
+// Exported by glibc.
+extern char** environ;
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+// O_LARGEFILE as defined by Linux. glibc tries to be clever by setting it to 0
+// because "it isn't needed", even though Linux can return it via F_GETFL.
+constexpr int kOLargeFile = 00100000;
+
+// Takes the subprocess command line and pid.
+// If it returns !OK, WithSubprocess returns immediately.
+using SubprocessCallback = std::function<PosixError(int)>;
+
+std::vector<std::string> saved_argv; // NOLINT
+
+// Helper function to dump /proc/{pid}/status and check the
+// state data. State should = "Z" for zombied or "RSD" for
+// running, interruptible sleeping (S), or uninterruptible sleep
+// (D).
+void CompareProcessState(absl::string_view state, int pid) {
+ auto status_file = ASSERT_NO_ERRNO_AND_VALUE(
+ GetContents(absl::StrCat("/proc/", pid, "/status")));
+ EXPECT_THAT(status_file, ContainsRegex(absl::StrCat("State:.[", state,
+ "]\\s+\\(\\w+\\)")));
+}
+
+// Run callbacks while a subprocess is running, zombied, and/or exited.
+PosixError WithSubprocess(SubprocessCallback const& running,
+ SubprocessCallback const& zombied,
+ SubprocessCallback const& exited) {
+ int pipe_fds[2] = {};
+ if (pipe(pipe_fds) < 0) {
+ return PosixError(errno, "pipe");
+ }
+
+ int child_pid = fork();
+ if (child_pid < 0) {
+ return PosixError(errno, "fork");
+ }
+
+ if (child_pid == 0) {
+ close(pipe_fds[0]); // Close the read end.
+ const DisableSave ds; // Timing issues.
+
+ // Write to the pipe to tell it we're ready.
+ char buf = 'a';
+ int res = 0;
+ res = WriteFd(pipe_fds[1], &buf, sizeof(buf));
+ TEST_CHECK_MSG(res == sizeof(buf), "Write failure in subprocess");
+
+ while (true) {
+ SleepSafe(absl::Milliseconds(100));
+ }
+ __builtin_unreachable();
+ }
+
+ close(pipe_fds[1]); // Close the write end.
+
+ int status = 0;
+ auto wait_cleanup = Cleanup([child_pid, &status] {
+ EXPECT_THAT(waitpid(child_pid, &status, 0), SyscallSucceeds());
+ });
+ auto kill_cleanup = Cleanup([child_pid] {
+ EXPECT_THAT(kill(child_pid, SIGKILL), SyscallSucceeds());
+ });
+
+ // Wait for the child.
+ char buf = 0;
+ int res = ReadFd(pipe_fds[0], &buf, sizeof(buf));
+ if (res < 0) {
+ return PosixError(errno, "Read from pipe");
+ } else if (res == 0) {
+ return PosixError(EPIPE, "Unable to read from pipe: EOF");
+ }
+
+ if (running) {
+ // The first arg, RSD, refers to a "running process", or a process with a
+ // state of Running (R), Interruptable Sleep (S) or Uninterruptable
+ // Sleep (D).
+ CompareProcessState("RSD", child_pid);
+ RETURN_IF_ERRNO(running(child_pid));
+ }
+
+ // Kill the process.
+ kill_cleanup.Release()();
+ siginfo_t info;
+ // Wait until the child process has exited (WEXITED flag) but don't
+ // reap the child (WNOWAIT flag).
+ waitid(P_PID, child_pid, &info, WNOWAIT | WEXITED);
+
+ if (zombied) {
+ // Arg of "Z" refers to a Zombied Process.
+ CompareProcessState("Z", child_pid);
+ RETURN_IF_ERRNO(zombied(child_pid));
+ }
+
+ // Wait on the process.
+ wait_cleanup.Release()();
+ // If the process is reaped, then then this should return
+ // with ECHILD.
+ EXPECT_THAT(waitpid(child_pid, &status, WNOHANG),
+ SyscallFailsWithErrno(ECHILD));
+
+ if (exited) {
+ RETURN_IF_ERRNO(exited(child_pid));
+ }
+
+ return NoError();
+}
+
+// Access the file returned by name when a subprocess is running.
+PosixError AccessWhileRunning(std::function<std::string(int pid)> name, int flags,
+ std::function<void(int fd)> access) {
+ FileDescriptor fd;
+ return WithSubprocess(
+ [&](int pid) -> PosixError {
+ // Running.
+ ASSIGN_OR_RETURN_ERRNO(fd, Open(name(pid), flags));
+
+ access(fd.get());
+ return NoError();
+ },
+ nullptr, nullptr);
+}
+
+// Access the file returned by name when the a subprocess is zombied.
+PosixError AccessWhileZombied(std::function<std::string(int pid)> name, int flags,
+ std::function<void(int fd)> access) {
+ FileDescriptor fd;
+ return WithSubprocess(
+ [&](int pid) -> PosixError {
+ // Running.
+ ASSIGN_OR_RETURN_ERRNO(fd, Open(name(pid), flags));
+ return NoError();
+ },
+ [&](int pid) -> PosixError {
+ // Zombied.
+ access(fd.get());
+ return NoError();
+ },
+ nullptr);
+}
+
+// Access the file returned by name when the a subprocess is exited.
+PosixError AccessWhileExited(std::function<std::string(int pid)> name, int flags,
+ std::function<void(int fd)> access) {
+ FileDescriptor fd;
+ return WithSubprocess(
+ [&](int pid) -> PosixError {
+ // Running.
+ ASSIGN_OR_RETURN_ERRNO(fd, Open(name(pid), flags));
+ return NoError();
+ },
+ nullptr,
+ [&](int pid) -> PosixError {
+ // Exited.
+ access(fd.get());
+ return NoError();
+ });
+}
+
+// ReadFd(fd=/proc/PID/basename) while PID is running.
+int ReadWhileRunning(std::string const& basename, void* buf, size_t count) {
+ int ret = 0;
+ int err = 0;
+ EXPECT_NO_ERRNO(AccessWhileRunning(
+ [&](int pid) -> std::string {
+ return absl::StrCat("/proc/", pid, "/", basename);
+ },
+ O_RDONLY,
+ [&](int fd) {
+ ret = ReadFd(fd, buf, count);
+ err = errno;
+ }));
+ errno = err;
+ return ret;
+}
+
+// ReadFd(fd=/proc/PID/basename) while PID is zombied.
+int ReadWhileZombied(std::string const& basename, void* buf, size_t count) {
+ int ret = 0;
+ int err = 0;
+ EXPECT_NO_ERRNO(AccessWhileZombied(
+ [&](int pid) -> std::string {
+ return absl::StrCat("/proc/", pid, "/", basename);
+ },
+ O_RDONLY,
+ [&](int fd) {
+ ret = ReadFd(fd, buf, count);
+ err = errno;
+ }));
+ errno = err;
+ return ret;
+}
+
+// ReadFd(fd=/proc/PID/basename) while PID is exited.
+int ReadWhileExited(std::string const& basename, void* buf, size_t count) {
+ int ret = 0;
+ int err = 0;
+ EXPECT_NO_ERRNO(AccessWhileExited(
+ [&](int pid) -> std::string {
+ return absl::StrCat("/proc/", pid, "/", basename);
+ },
+ O_RDONLY,
+ [&](int fd) {
+ ret = ReadFd(fd, buf, count);
+ err = errno;
+ }));
+ errno = err;
+ return ret;
+}
+
+// readlinkat(fd=/proc/PID/, basename) while PID is running.
+int ReadlinkWhileRunning(std::string const& basename, char* buf, size_t count) {
+ int ret = 0;
+ int err = 0;
+ EXPECT_NO_ERRNO(AccessWhileRunning(
+ [&](int pid) -> std::string { return absl::StrCat("/proc/", pid, "/"); },
+ O_DIRECTORY,
+ [&](int fd) {
+ ret = readlinkat(fd, basename.c_str(), buf, count);
+ err = errno;
+ }));
+ errno = err;
+ return ret;
+}
+
+// readlinkat(fd=/proc/PID/, basename) while PID is zombied.
+int ReadlinkWhileZombied(std::string const& basename, char* buf, size_t count) {
+ int ret = 0;
+ int err = 0;
+ EXPECT_NO_ERRNO(AccessWhileZombied(
+ [&](int pid) -> std::string { return absl::StrCat("/proc/", pid, "/"); },
+ O_DIRECTORY,
+ [&](int fd) {
+ ret = readlinkat(fd, basename.c_str(), buf, count);
+ err = errno;
+ }));
+ errno = err;
+ return ret;
+}
+
+// readlinkat(fd=/proc/PID/, basename) while PID is exited.
+int ReadlinkWhileExited(std::string const& basename, char* buf, size_t count) {
+ int ret = 0;
+ int err = 0;
+ EXPECT_NO_ERRNO(AccessWhileExited(
+ [&](int pid) -> std::string { return absl::StrCat("/proc/", pid, "/"); },
+ O_DIRECTORY,
+ [&](int fd) {
+ ret = readlinkat(fd, basename.c_str(), buf, count);
+ err = errno;
+ }));
+ errno = err;
+ return ret;
+}
+
+TEST(ProcSelfTest, IsThreadGroupLeader) {
+ ScopedThread([] {
+ const pid_t tgid = getpid();
+ const pid_t tid = syscall(SYS_gettid);
+ EXPECT_NE(tgid, tid);
+ auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self"));
+ EXPECT_EQ(link, absl::StrCat(tgid));
+ });
+}
+
+TEST(ProcThreadSelfTest, Basic) {
+ const pid_t tgid = getpid();
+ const pid_t tid = syscall(SYS_gettid);
+ EXPECT_EQ(tgid, tid);
+ auto link_threadself =
+ ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/thread-self"));
+ EXPECT_EQ(link_threadself, absl::StrCat(tgid, "/task/", tid));
+ // Just read one file inside thread-self to ensure that the link is valid.
+ auto link_threadself_exe =
+ ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/thread-self/exe"));
+ auto link_procself_exe =
+ ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/exe"));
+ EXPECT_EQ(link_threadself_exe, link_procself_exe);
+}
+
+TEST(ProcThreadSelfTest, Thread) {
+ ScopedThread([] {
+ const pid_t tgid = getpid();
+ const pid_t tid = syscall(SYS_gettid);
+ EXPECT_NE(tgid, tid);
+ auto link_threadself =
+ ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/thread-self"));
+
+ EXPECT_EQ(link_threadself, absl::StrCat(tgid, "/task/", tid));
+ // Just read one file inside thread-self to ensure that the link is valid.
+ auto link_threadself_exe =
+ ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/thread-self/exe"));
+ auto link_procself_exe =
+ ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/exe"));
+ EXPECT_EQ(link_threadself_exe, link_procself_exe);
+ // A thread should not have "/proc/<tid>/task".
+ struct stat s;
+ EXPECT_THAT(stat("/proc/thread-self/task", &s),
+ SyscallFailsWithErrno(ENOENT));
+ });
+}
+
+// Returns the /proc/PID/maps entry for the MAP_PRIVATE | MAP_ANONYMOUS mapping
+// m with start address addr and length len.
+std::string AnonymousMapsEntry(uintptr_t addr, size_t len, int prot) {
+ return absl::StrCat(absl::Hex(addr, absl::PadSpec::kZeroPad8), "-",
+ absl::Hex(addr + len, absl::PadSpec::kZeroPad8), " ",
+ prot & PROT_READ ? "r" : "-",
+ prot & PROT_WRITE ? "w" : "-",
+ prot & PROT_EXEC ? "x" : "-", "p 00000000 00:00 0 ");
+}
+
+std::string AnonymousMapsEntryForMapping(const Mapping& m, int prot) {
+ return AnonymousMapsEntry(m.addr(), m.len(), prot);
+}
+
+PosixErrorOr<std::map<uint64_t, uint64_t>> ReadProcSelfAuxv() {
+ std::string auxv_file;
+ RETURN_IF_ERRNO(GetContents("/proc/self/auxv", &auxv_file));
+ const Elf64_auxv_t* auxv_data =
+ reinterpret_cast<const Elf64_auxv_t*>(auxv_file.data());
+ std::map<uint64_t, uint64_t> auxv_entries;
+ for (int i = 0; auxv_data[i].a_type != AT_NULL; i++) {
+ auto a_type = auxv_data[i].a_type;
+ EXPECT_EQ(0, auxv_entries.count(a_type)) << "a_type: " << a_type;
+ auxv_entries.emplace(a_type, auxv_data[i].a_un.a_val);
+ }
+ return auxv_entries;
+}
+
+TEST(ProcSelfAuxv, EntryPresence) {
+ auto auxv_entries = ASSERT_NO_ERRNO_AND_VALUE(ReadProcSelfAuxv());
+
+ EXPECT_EQ(auxv_entries.count(AT_ENTRY), 1);
+ EXPECT_EQ(auxv_entries.count(AT_PHDR), 1);
+ EXPECT_EQ(auxv_entries.count(AT_PHENT), 1);
+ EXPECT_EQ(auxv_entries.count(AT_PHNUM), 1);
+ EXPECT_EQ(auxv_entries.count(AT_BASE), 1);
+ EXPECT_EQ(auxv_entries.count(AT_CLKTCK), 1);
+ EXPECT_EQ(auxv_entries.count(AT_RANDOM), 1);
+ EXPECT_EQ(auxv_entries.count(AT_EXECFN), 1);
+ EXPECT_EQ(auxv_entries.count(AT_PAGESZ), 1);
+ EXPECT_EQ(auxv_entries.count(AT_SYSINFO_EHDR), 1);
+}
+
+TEST(ProcSelfAuxv, EntryValues) {
+ auto proc_auxv = ASSERT_NO_ERRNO_AND_VALUE(ReadProcSelfAuxv());
+
+ // We need to find the ELF auxiliary vector. The section of memory pointed to
+ // by envp contains some pointers to non-null pointers, followed by a single
+ // pointer to a null pointer, followed by the auxiliary vector.
+ char** envpi = environ;
+ while (*envpi) {
+ ++envpi;
+ }
+
+ const Elf64_auxv_t* envp_auxv =
+ reinterpret_cast<const Elf64_auxv_t*>(envpi + 1);
+ int i;
+ for (i = 0; envp_auxv[i].a_type != AT_NULL; i++) {
+ auto a_type = envp_auxv[i].a_type;
+ EXPECT_EQ(proc_auxv.count(a_type), 1);
+ EXPECT_EQ(proc_auxv[a_type], envp_auxv[i].a_un.a_val)
+ << "a_type: " << a_type;
+ }
+ EXPECT_EQ(i, proc_auxv.size());
+}
+
+// Just open and read /proc/self/maps, check that we can find [stack]
+TEST(ProcSelfMaps, Basic) {
+ auto proc_self_maps =
+ ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps"));
+
+ std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n');
+ std::vector<std::string> stacks;
+ // Make sure there's a stack in there.
+ for (const auto& str : strings) {
+ if (str.find("[stack]") != std::string::npos) {
+ stacks.push_back(str);
+ }
+ }
+ ASSERT_EQ(1, stacks.size()) << "[stack] not found in: " << proc_self_maps;
+ // Linux pads to 73 characters then we add 7.
+ EXPECT_EQ(80, stacks[0].length());
+}
+
+TEST(ProcSelfMaps, Map1) {
+ Mapping mapping =
+ ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_READ, MAP_PRIVATE));
+ auto proc_self_maps =
+ ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps"));
+ std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n');
+ std::vector<std::string> addrs;
+ // Make sure if is listed.
+ for (const auto& str : strings) {
+ if (str == AnonymousMapsEntryForMapping(mapping, PROT_READ)) {
+ addrs.push_back(str);
+ }
+ }
+ ASSERT_EQ(1, addrs.size());
+}
+
+TEST(ProcSelfMaps, Map2) {
+ // NOTE: The permissions must be different or the pages will get merged.
+ Mapping map1 = ASSERT_NO_ERRNO_AND_VALUE(
+ MmapAnon(kPageSize, PROT_READ | PROT_EXEC, MAP_PRIVATE));
+ Mapping map2 =
+ ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_WRITE, MAP_PRIVATE));
+
+ auto proc_self_maps =
+ ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps"));
+ std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n');
+ std::vector<std::string> addrs;
+ // Make sure if is listed.
+ for (const auto& str : strings) {
+ if (str == AnonymousMapsEntryForMapping(map1, PROT_READ | PROT_EXEC)) {
+ addrs.push_back(str);
+ }
+ }
+ ASSERT_EQ(1, addrs.size());
+ addrs.clear();
+ for (const auto& str : strings) {
+ if (str == AnonymousMapsEntryForMapping(map2, PROT_WRITE)) {
+ addrs.push_back(str);
+ }
+ }
+ ASSERT_EQ(1, addrs.size());
+}
+
+TEST(ProcSelfMaps, MapUnmap) {
+ Mapping map1 = ASSERT_NO_ERRNO_AND_VALUE(
+ MmapAnon(kPageSize, PROT_READ | PROT_EXEC, MAP_PRIVATE));
+ Mapping map2 =
+ ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_WRITE, MAP_PRIVATE));
+
+ auto proc_self_maps =
+ ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps"));
+ std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n');
+ std::vector<std::string> addrs;
+ // Make sure if is listed.
+ for (const auto& str : strings) {
+ if (str == AnonymousMapsEntryForMapping(map1, PROT_READ | PROT_EXEC)) {
+ addrs.push_back(str);
+ }
+ }
+ ASSERT_EQ(1, addrs.size()) << proc_self_maps;
+ addrs.clear();
+ for (const auto& str : strings) {
+ if (str == AnonymousMapsEntryForMapping(map2, PROT_WRITE)) {
+ addrs.push_back(str);
+ }
+ }
+ ASSERT_EQ(1, addrs.size());
+
+ map2.reset();
+
+ // Read it again.
+ proc_self_maps = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps"));
+ strings = absl::StrSplit(proc_self_maps, '\n');
+ // First entry should be there.
+ addrs.clear();
+ for (const auto& str : strings) {
+ if (str == AnonymousMapsEntryForMapping(map1, PROT_READ | PROT_EXEC)) {
+ addrs.push_back(str);
+ }
+ }
+ ASSERT_EQ(1, addrs.size());
+ addrs.clear();
+ // But not the second.
+ for (const auto& str : strings) {
+ if (str == AnonymousMapsEntryForMapping(map2, PROT_WRITE)) {
+ addrs.push_back(str);
+ }
+ }
+ ASSERT_EQ(0, addrs.size());
+}
+
+TEST(ProcSelfMaps, Mprotect) {
+ if (!IsRunningOnGvisor()) {
+ // FIXME: Linux's mprotect() sometimes fails to merge VMAs in this
+ // case.
+ LOG(WARNING) << "Skipping test on Linux";
+ return;
+ }
+
+ // Reserve 5 pages of address space.
+ Mapping m = ASSERT_NO_ERRNO_AND_VALUE(
+ MmapAnon(5 * kPageSize, PROT_NONE, MAP_PRIVATE));
+
+ // Change the permissions on the middle 3 pages. (The first and last pages may
+ // be merged with other vmas on either side, so they aren't tested directly;
+ // they just ensure that the middle 3 pages are bracketed by VMAs with
+ // incompatible permissions.)
+ ASSERT_THAT(mprotect(reinterpret_cast<void*>(m.addr() + kPageSize),
+ 3 * kPageSize, PROT_READ),
+ SyscallSucceeds());
+
+ // Check that the middle 3 pages make up a single VMA.
+ auto proc_self_maps =
+ ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps"));
+ std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n');
+ EXPECT_THAT(strings, Contains(AnonymousMapsEntry(m.addr() + kPageSize,
+ 3 * kPageSize, PROT_READ)));
+
+ // Change the permissions on the middle page only.
+ ASSERT_THAT(mprotect(reinterpret_cast<void*>(m.addr() + 2 * kPageSize),
+ kPageSize, PROT_READ | PROT_WRITE),
+ SyscallSucceeds());
+
+ // Check that the single VMA has been split into 3 VMAs.
+ proc_self_maps = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps"));
+ strings = absl::StrSplit(proc_self_maps, '\n');
+ EXPECT_THAT(
+ strings,
+ IsSupersetOf(
+ {AnonymousMapsEntry(m.addr() + kPageSize, kPageSize, PROT_READ),
+ AnonymousMapsEntry(m.addr() + 2 * kPageSize, kPageSize,
+ PROT_READ | PROT_WRITE),
+ AnonymousMapsEntry(m.addr() + 3 * kPageSize, kPageSize,
+ PROT_READ)}));
+
+ // Change the permissions on the middle page back.
+ ASSERT_THAT(mprotect(reinterpret_cast<void*>(m.addr() + 2 * kPageSize),
+ kPageSize, PROT_READ),
+ SyscallSucceeds());
+
+ // Check that the 3 VMAs have been merged back into a single VMA.
+ proc_self_maps = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps"));
+ strings = absl::StrSplit(proc_self_maps, '\n');
+ EXPECT_THAT(strings, Contains(AnonymousMapsEntry(m.addr() + kPageSize,
+ 3 * kPageSize, PROT_READ)));
+}
+
+TEST(ProcSelfFd, OpenFd) {
+ int pipe_fds[2];
+ ASSERT_THAT(pipe2(pipe_fds, O_CLOEXEC), SyscallSucceeds());
+
+ // Reopen the write end.
+ const std::string path = absl::StrCat("/proc/self/fd/", pipe_fds[1]);
+ const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, O_WRONLY));
+
+ // Ensure that a read/write works.
+ const std::string data = "hello";
+ std::unique_ptr<char[]> buffer(new char[data.size()]);
+ EXPECT_THAT(write(fd.get(), data.c_str(), data.size()),
+ SyscallSucceedsWithValue(5));
+ EXPECT_THAT(read(pipe_fds[0], buffer.get(), data.size()),
+ SyscallSucceedsWithValue(5));
+ EXPECT_EQ(strncmp(buffer.get(), data.c_str(), data.size()), 0);
+
+ // Cleanup.
+ ASSERT_THAT(close(pipe_fds[0]), SyscallSucceeds());
+ ASSERT_THAT(close(pipe_fds[1]), SyscallSucceeds());
+}
+
+TEST(ProcSelfFdInfo, CorrectFds) {
+ // Make sure there is at least one open file.
+ auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY));
+
+ // Get files in /proc/self/fd.
+ auto fd_files = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/proc/self/fd", false));
+
+ // Get files in /proc/self/fdinfo.
+ auto fdinfo_files =
+ ASSERT_NO_ERRNO_AND_VALUE(ListDir("/proc/self/fdinfo", false));
+
+ // They should contain the same fds.
+ EXPECT_THAT(fd_files, UnorderedElementsAreArray(fdinfo_files));
+
+ // Both should contain fd.
+ auto fd_s = absl::StrCat(fd.get());
+ EXPECT_THAT(fd_files, Contains(fd_s));
+}
+
+TEST(ProcSelfFdInfo, Flags) {
+ std::string path = NewTempAbsPath();
+
+ // Create file here with O_CREAT to test that O_CREAT does not appear in
+ // fdinfo flags.
+ int flags = O_CREAT | O_RDWR | O_APPEND | O_CLOEXEC;
+ const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, flags, 0644));
+
+ // Automatically delete path.
+ TempPath temp_path(path);
+
+ // O_CREAT does not appear in fdinfo flags.
+ flags &= ~O_CREAT;
+
+ // O_LARGEFILE always appears (on x86_64).
+ flags |= kOLargeFile;
+
+ auto fd_info = ASSERT_NO_ERRNO_AND_VALUE(
+ GetContents(absl::StrCat("/proc/self/fdinfo/", fd.get())));
+ EXPECT_THAT(fd_info, HasSubstr(absl::StrFormat("flags:\t%#o", flags)));
+}
+
+TEST(ProcSelfExe, Absolute) {
+ auto exe = ASSERT_NO_ERRNO_AND_VALUE(
+ ReadLink(absl::StrCat("/proc/", getpid(), "/exe")));
+ EXPECT_EQ(exe[0], '/');
+}
+
+// Sanity check for /proc/cpuinfo fields that must be present.
+TEST(ProcCpuinfo, RequiredFieldsArePresent) {
+ std::string proc_cpuinfo = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/cpuinfo"));
+ ASSERT_FALSE(proc_cpuinfo.empty());
+ std::vector<std::string> cpuinfo_fields = absl::StrSplit(proc_cpuinfo, '\n');
+
+ // This list of "required" fields is taken from reading the file
+ // arch/x86/kernel/cpu/proc.c and seeing which fields will be unconditionally
+ // printed by the kernel.
+ static const char* required_fields[] = {
+ "processor",
+ "vendor_id",
+ "cpu family",
+ "model\t\t:",
+ "model name",
+ "stepping",
+ "cpu MHz",
+ "fpu\t\t:",
+ "fpu_exception",
+ "cpuid level",
+ "wp",
+ "bogomips",
+ "clflush size",
+ "cache_alignment",
+ "address sizes",
+ "power management",
+ };
+
+ // Check that the usual fields are there. We don't really care about the
+ // contents.
+ for (const std::string& field : required_fields) {
+ EXPECT_THAT(proc_cpuinfo, HasSubstr(field));
+ }
+}
+
+// Sanity checks that uptime is present.
+TEST(ProcUptime, IsPresent) {
+ std::string proc_uptime = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/uptime"));
+ ASSERT_FALSE(proc_uptime.empty());
+ std::vector<std::string> uptime_parts = absl::StrSplit(proc_uptime, ' ');
+
+ // Parse once.
+ double uptime0, uptime1, idletime0, idletime1;
+ ASSERT_TRUE(absl::SimpleAtod(uptime_parts[0], &uptime0));
+ ASSERT_TRUE(absl::SimpleAtod(uptime_parts[1], &idletime0));
+
+ // Sleep for one second.
+ absl::SleepFor(absl::Seconds(1));
+
+ // Parse again.
+ proc_uptime = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/uptime"));
+ ASSERT_FALSE(proc_uptime.empty());
+ uptime_parts = absl::StrSplit(proc_uptime, ' ');
+ ASSERT_TRUE(absl::SimpleAtod(uptime_parts[0], &uptime1));
+ ASSERT_TRUE(absl::SimpleAtod(uptime_parts[1], &idletime1));
+
+ // Sanity check.
+ //
+ // We assert that between 0.99 and 59.99 seconds have passed. If more than a
+ // minute has passed, then we must be executing really, really slowly.
+ EXPECT_GE(uptime0, 0.0);
+ EXPECT_GE(idletime0, 0.0);
+ EXPECT_GT(uptime1, uptime0);
+ EXPECT_GE(uptime1, uptime0 + 0.99);
+ EXPECT_LE(uptime1, uptime0 + 59.99);
+ EXPECT_GE(idletime1, idletime0);
+}
+
+TEST(ProcMeminfo, ContainsBasicFields) {
+ std::string proc_meminfo = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/meminfo"));
+ EXPECT_THAT(proc_meminfo, AllOf(ContainsRegex(R"(MemTotal:\s+[0-9]+ kB)"),
+ ContainsRegex(R"(MemFree:\s+[0-9]+ kB)")));
+}
+
+TEST(ProcStat, ContainsBasicFields) {
+ std::string proc_stat = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/stat"));
+
+ std::vector<std::string> names;
+ for (auto const& line : absl::StrSplit(proc_stat, '\n')) {
+ std::vector<std::string> fields =
+ absl::StrSplit(line, ' ', absl::SkipWhitespace());
+ if (fields.empty()) {
+ continue;
+ }
+ names.push_back(fields[0]);
+ }
+
+ EXPECT_THAT(names,
+ IsSupersetOf({"cpu", "intr", "ctxt", "btime", "processes",
+ "procs_running", "procs_blocked", "softirq"}));
+}
+
+TEST(ProcStat, EndsWithNewline) {
+ std::string proc_stat = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/stat"));
+ EXPECT_EQ(proc_stat.back(), '\n');
+}
+
+TEST(ProcStat, Fields) {
+ std::string proc_stat = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/stat"));
+
+ std::vector<std::string> names;
+ for (auto const& line : absl::StrSplit(proc_stat, '\n')) {
+ std::vector<std::string> fields =
+ absl::StrSplit(line, ' ', absl::SkipWhitespace());
+ if (fields.empty()) {
+ continue;
+ }
+
+ if (absl::StartsWith(fields[0], "cpu")) {
+ // As of Linux 3.11, each CPU entry has 10 fields, plus the name.
+ EXPECT_GE(fields.size(), 11) << proc_stat;
+ } else if (fields[0] == "ctxt") {
+ // Single field.
+ EXPECT_EQ(fields.size(), 2) << proc_stat;
+ } else if (fields[0] == "btime") {
+ // Single field.
+ EXPECT_EQ(fields.size(), 2) << proc_stat;
+ } else if (fields[0] == "itime") {
+ // Single field.
+ ASSERT_EQ(fields.size(), 2) << proc_stat;
+ // This is the only floating point field.
+ double val;
+ EXPECT_TRUE(absl::SimpleAtod(fields[1], &val)) << proc_stat;
+ continue;
+ } else if (fields[0] == "processes") {
+ // Single field.
+ EXPECT_EQ(fields.size(), 2) << proc_stat;
+ } else if (fields[0] == "procs_running") {
+ // Single field.
+ EXPECT_EQ(fields.size(), 2) << proc_stat;
+ } else if (fields[0] == "procs_blocked") {
+ // Single field.
+ EXPECT_EQ(fields.size(), 2) << proc_stat;
+ } else if (fields[0] == "softirq") {
+ // As of Linux 3.11, there are 10 softirqs. 12 fields for name + total.
+ EXPECT_GE(fields.size(), 12) << proc_stat;
+ }
+
+ // All fields besides itime are valid base 10 numbers.
+ for (size_t i = 1; i < fields.size(); i++) {
+ uint64_t val;
+ EXPECT_TRUE(absl::SimpleAtoi(fields[i], &val)) << proc_stat;
+ }
+ }
+}
+
+TEST(ProcLoadavg, EndsWithNewline) {
+ std::string proc_loadvg = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/loadavg"));
+ EXPECT_EQ(proc_loadvg.back(), '\n');
+}
+
+TEST(ProcLoadavg, Fields) {
+ std::string proc_loadvg = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/loadavg"));
+ std::vector<std::string> lines = absl::StrSplit(proc_loadvg, '\n');
+
+ // Single line.
+ EXPECT_EQ(lines.size(), 2) << proc_loadvg;
+
+ std::vector<std::string> fields =
+ absl::StrSplit(lines[0], absl::ByAnyChar(" /"), absl::SkipWhitespace());
+
+ // Six fields.
+ EXPECT_EQ(fields.size(), 6) << proc_loadvg;
+
+ double val;
+ uint64_t val2;
+ // First three fields are floating point numbers.
+ EXPECT_TRUE(absl::SimpleAtod(fields[0], &val)) << proc_loadvg;
+ EXPECT_TRUE(absl::SimpleAtod(fields[1], &val)) << proc_loadvg;
+ EXPECT_TRUE(absl::SimpleAtod(fields[2], &val)) << proc_loadvg;
+ // Rest of the fields are valid base 10 numbers.
+ EXPECT_TRUE(absl::SimpleAtoi(fields[3], &val2)) << proc_loadvg;
+ EXPECT_TRUE(absl::SimpleAtoi(fields[4], &val2)) << proc_loadvg;
+ EXPECT_TRUE(absl::SimpleAtoi(fields[5], &val2)) << proc_loadvg;
+}
+
+// NOTE: Tests in priority.cc also check certain priority related fields in
+// /proc/self/stat.
+
+class ProcPidStatTest : public ::testing::TestWithParam<std::string> {};
+
+TEST_P(ProcPidStatTest, HasBasicFields) {
+ std::string proc_pid_stat = ASSERT_NO_ERRNO_AND_VALUE(
+ GetContents(absl::StrCat("/proc/", GetParam(), "/stat")));
+
+ ASSERT_FALSE(proc_pid_stat.empty());
+ std::vector<std::string> fields = absl::StrSplit(proc_pid_stat, ' ');
+ ASSERT_GE(fields.size(), 24);
+ EXPECT_EQ(absl::StrCat(getpid()), fields[0]);
+ // fields[1] is the thread name.
+ EXPECT_EQ("R", fields[2]); // task state
+ EXPECT_EQ(absl::StrCat(getppid()), fields[3]);
+
+ uint64_t vss;
+ ASSERT_TRUE(absl::SimpleAtoi(fields[22], &vss));
+ EXPECT_GT(vss, 0);
+
+ uint64_t rss;
+ ASSERT_TRUE(absl::SimpleAtoi(fields[23], &rss));
+ EXPECT_GT(rss, 0);
+}
+
+INSTANTIATE_TEST_CASE_P(SelfAndNumericPid, ProcPidStatTest,
+ ::testing::Values("self", absl::StrCat(getpid())));
+
+using ProcPidStatmTest = ::testing::TestWithParam<std::string>;
+
+TEST_P(ProcPidStatmTest, HasBasicFields) {
+ std::string proc_pid_statm = ASSERT_NO_ERRNO_AND_VALUE(
+ GetContents(absl::StrCat("/proc/", GetParam(), "/statm")));
+ ASSERT_FALSE(proc_pid_statm.empty());
+ std::vector<std::string> fields = absl::StrSplit(proc_pid_statm, ' ');
+ ASSERT_GE(fields.size(), 7);
+
+ uint64_t vss;
+ ASSERT_TRUE(absl::SimpleAtoi(fields[0], &vss));
+ EXPECT_GT(vss, 0);
+
+ uint64_t rss;
+ ASSERT_TRUE(absl::SimpleAtoi(fields[1], &rss));
+ EXPECT_GT(rss, 0);
+}
+
+INSTANTIATE_TEST_CASE_P(SelfAndNumericPid, ProcPidStatmTest,
+ ::testing::Values("self", absl::StrCat(getpid())));
+
+PosixErrorOr<uint64_t> CurrentRSS() {
+ ASSIGN_OR_RETURN_ERRNO(auto proc_self_stat, GetContents("/proc/self/stat"));
+ if (proc_self_stat.empty()) {
+ return PosixError(EINVAL, "empty /proc/self/stat");
+ }
+
+ std::vector<std::string> fields = absl::StrSplit(proc_self_stat, ' ');
+ if (fields.size() < 24) {
+ return PosixError(
+ EINVAL,
+ absl::StrCat("/proc/self/stat has too few fields: ", proc_self_stat));
+ }
+
+ uint64_t rss;
+ if (!absl::SimpleAtoi(fields[23], &rss)) {
+ return PosixError(
+ EINVAL, absl::StrCat("/proc/self/stat RSS field is not a number: ",
+ fields[23]));
+ }
+
+ // RSS is given in number of pages.
+ return rss * kPageSize;
+}
+
+// The size of mapping created by MapPopulateRSS.
+constexpr uint64_t kMappingSize = 100 << 20;
+
+// Tolerance on RSS comparisons to account for background thread mappings,
+// reclaimed pages, newly faulted pages, etc.
+constexpr uint64_t kRSSTolerance = 5 << 20;
+
+// Capture RSS before and after an anonymous mapping with passed prot.
+void MapPopulateRSS(int prot, uint64_t* before, uint64_t* after) {
+ *before = ASSERT_NO_ERRNO_AND_VALUE(CurrentRSS());
+
+ // N.B. The kernel asynchronously accumulates per-task RSS counters into the
+ // mm RSS, which is exposed by /proc/PID/stat. Task exit is a synchronization
+ // point (kernel/exit.c:do_exit -> sync_mm_rss), so perform the mapping on
+ // another thread to ensure it is reflected in RSS after the thread exits.
+ Mapping mapping;
+ ScopedThread t([&mapping, prot] {
+ mapping = ASSERT_NO_ERRNO_AND_VALUE(
+ MmapAnon(kMappingSize, prot, MAP_PRIVATE | MAP_POPULATE));
+ });
+ t.Join();
+
+ *after = ASSERT_NO_ERRNO_AND_VALUE(CurrentRSS());
+}
+
+// TODO: Test for PROT_READ + MAP_POPULATE anonymous mappings. Their
+// semantics are more subtle:
+//
+// Small pages -> Zero page mapped, not counted in RSS
+// (mm/memory.c:do_anonymous_page).
+//
+// Huge pages (THP enabled, use_zero_page=0) -> Pages committed
+// (mm/memory.c:__handle_mm_fault -> create_huge_pmd).
+//
+// Huge pages (THP enabled, use_zero_page=1) -> Zero page mapped, not counted in
+// RSS (mm/huge_memory.c:do_huge_pmd_anonymous_page).
+
+// PROT_WRITE + MAP_POPULATE anonymous mappings are always committed.
+TEST(ProcSelfStat, PopulateWriteRSS) {
+ uint64_t before, after;
+ MapPopulateRSS(PROT_READ | PROT_WRITE, &before, &after);
+
+ // Mapping is committed.
+ EXPECT_NEAR(before + kMappingSize, after, kRSSTolerance);
+}
+
+// PROT_NONE + MAP_POPULATE anonymous mappings are never committed.
+TEST(ProcSelfStat, PopulateNoneRSS) {
+ uint64_t before, after;
+ MapPopulateRSS(PROT_NONE, &before, &after);
+
+ // Mapping not committed.
+ EXPECT_NEAR(before, after, kRSSTolerance);
+}
+
+// Returns the calling thread's name.
+PosixErrorOr<std::string> ThreadName() {
+ // "The buffer should allow space for up to 16 bytes; the returned std::string
+ // will be null-terminated if it is shorter than that." - prctl(2). But we
+ // always want the thread name to be null-terminated.
+ char thread_name[17];
+ int rc = prctl(PR_GET_NAME, thread_name, 0, 0, 0);
+ MaybeSave();
+ if (rc < 0) {
+ return PosixError(errno, "prctl(PR_GET_NAME)");
+ }
+ thread_name[16] = '\0';
+ return std::string(thread_name);
+}
+
+// Parses the contents of a /proc/[pid]/status file into a collection of
+// key-value pairs.
+PosixErrorOr<std::map<std::string, std::string>> ParseProcStatus(
+ absl::string_view status_str) {
+ std::map<std::string, std::string> fields;
+ for (absl::string_view const line :
+ absl::StrSplit(status_str, '\n', absl::SkipWhitespace())) {
+ const std::pair<absl::string_view, absl::string_view> kv =
+ absl::StrSplit(line, absl::MaxSplits(":\t", 1));
+ if (kv.first.empty()) {
+ return PosixError(
+ EINVAL, absl::StrCat("failed to parse key in line \"", line, "\""));
+ }
+ std::string key(kv.first);
+ if (fields.count(key)) {
+ return PosixError(EINVAL,
+ absl::StrCat("duplicate key \"", kv.first, "\""));
+ }
+ std::string value(kv.second);
+ absl::StripLeadingAsciiWhitespace(&value);
+ fields.emplace(std::move(key), std::move(value));
+ }
+ return fields;
+}
+
+TEST(ParseProcStatusTest, ParsesSimpleStatusFileWithMixedWhitespaceCorrectly) {
+ EXPECT_THAT(
+ ParseProcStatus(
+ "Name:\tinit\nState:\tS (sleeping)\nCapEff:\t 0000001fffffffff\n"),
+ IsPosixErrorOkAndHolds(UnorderedElementsAre(
+ Pair("Name", "init"), Pair("State", "S (sleeping)"),
+ Pair("CapEff", "0000001fffffffff"))));
+}
+
+TEST(ParseProcStatusTest, DetectsDuplicateKeys) {
+ auto proc_status_or = ParseProcStatus("Name:\tfoo\nName:\tfoo\n");
+ EXPECT_THAT(proc_status_or,
+ PosixErrorIs(EINVAL, ::testing::StrEq("duplicate key \"Name\"")));
+}
+
+TEST(ParseProcStatusTest, DetectsMissingTabs) {
+ EXPECT_THAT(ParseProcStatus("Name:foo\nPid: 1\n"),
+ IsPosixErrorOkAndHolds(UnorderedElementsAre(Pair("Name:foo", ""),
+ Pair("Pid: 1", ""))));
+}
+
+TEST(ProcPidStatusTest, HasBasicFields) {
+ // Do this on a separate thread since we want tgid != tid.
+ ScopedThread([] {
+ const pid_t tgid = getpid();
+ const pid_t tid = syscall(SYS_gettid);
+ EXPECT_NE(tgid, tid);
+ const auto thread_name = ASSERT_NO_ERRNO_AND_VALUE(ThreadName());
+
+ std::string status_str = ASSERT_NO_ERRNO_AND_VALUE(
+ GetContents(absl::StrCat("/proc/", tid, "/status")));
+
+ ASSERT_FALSE(status_str.empty());
+ const auto status = ASSERT_NO_ERRNO_AND_VALUE(ParseProcStatus(status_str));
+ EXPECT_THAT(status, IsSupersetOf({Pair("Name", thread_name),
+ Pair("Tgid", absl::StrCat(tgid)),
+ Pair("Pid", absl::StrCat(tid)),
+ Pair("PPid", absl::StrCat(getppid()))}));
+ });
+}
+
+TEST(ProcPidStatusTest, StateRunning) {
+ // Task must be running when reading the file.
+ const pid_t tid = syscall(SYS_gettid);
+ std::string status_str = ASSERT_NO_ERRNO_AND_VALUE(
+ GetContents(absl::StrCat("/proc/", tid, "/status")));
+
+ EXPECT_THAT(ParseProcStatus(status_str),
+ IsPosixErrorOkAndHolds(Contains(Pair("State", "R (running)"))));
+}
+
+TEST(ProcPidStatusTest, StateSleeping_NoRandomSave) {
+ // Starts a child process that blocks and checks that State is sleeping.
+ auto res = WithSubprocess(
+ [&](int pid) -> PosixError {
+ // Because this test is timing based we will disable cooperative saving
+ // and the test itself also has random saving disabled.
+ const DisableSave ds;
+ // Try multiple times in case the child isn't sleeping when status file
+ // is read.
+ MonotonicTimer timer;
+ timer.Start();
+ for (;;) {
+ ASSIGN_OR_RETURN_ERRNO(
+ std::string status_str,
+ GetContents(absl::StrCat("/proc/", pid, "/status")));
+ ASSIGN_OR_RETURN_ERRNO(auto map, ParseProcStatus(status_str));
+ if (map["State"] == std::string("S (sleeping)")) {
+ // Test passed!
+ return NoError();
+ }
+ if (timer.Duration() > absl::Seconds(10)) {
+ return PosixError(ETIMEDOUT, "Timeout waiting for child to sleep");
+ }
+ absl::SleepFor(absl::Milliseconds(10));
+ }
+ },
+ nullptr, nullptr);
+ ASSERT_NO_ERRNO(res);
+}
+
+TEST(ProcPidStatusTest, ValuesAreTabDelimited) {
+ std::string status_str =
+ ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/status"));
+ ASSERT_FALSE(status_str.empty());
+ for (absl::string_view const line :
+ absl::StrSplit(status_str, '\n', absl::SkipWhitespace())) {
+ EXPECT_NE(std::string::npos, line.find(":\t"));
+ }
+}
+
+// Threads properly counts running threads.
+//
+// TODO: Test zombied threads while the thread group leader is still
+// running with generalized fork and clone children from the wait test.
+TEST(ProcPidStatusTest, Threads) {
+ char buf[4096] = {};
+ EXPECT_THAT(ReadWhileRunning("status", buf, sizeof(buf) - 1),
+ SyscallSucceedsWithValue(Gt(0)));
+
+ auto status = ASSERT_NO_ERRNO_AND_VALUE(ParseProcStatus(buf));
+ auto it = status.find("Threads");
+ ASSERT_NE(it, status.end());
+ int threads = -1;
+ EXPECT_TRUE(absl::SimpleAtoi(it->second, &threads))
+ << "Threads value " << it->second << " is not a number";
+ // Don't make assumptions about the exact number of threads, as it may not be
+ // constant.
+ EXPECT_GE(threads, 1);
+
+ memset(buf, 0, sizeof(buf));
+ EXPECT_THAT(ReadWhileZombied("status", buf, sizeof(buf) - 1),
+ SyscallSucceedsWithValue(Gt(0)));
+
+ status = ASSERT_NO_ERRNO_AND_VALUE(ParseProcStatus(buf));
+ it = status.find("Threads");
+ ASSERT_NE(it, status.end());
+ threads = -1;
+ EXPECT_TRUE(absl::SimpleAtoi(it->second, &threads))
+ << "Threads value " << it->second << " is not a number";
+ // There must be only the thread group leader remaining, zombied.
+ EXPECT_EQ(threads, 1);
+}
+
+// Returns true if all characters in s are digits.
+bool IsDigits(absl::string_view s) {
+ return std::all_of(s.begin(), s.end(), absl::ascii_isdigit);
+}
+
+TEST(ProcPidStatTest, VSSRSS) {
+ std::string status_str =
+ ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/status"));
+ ASSERT_FALSE(status_str.empty());
+ auto status = ASSERT_NO_ERRNO_AND_VALUE(ParseProcStatus(status_str));
+
+ const auto vss_it = status.find("VmSize");
+ ASSERT_NE(vss_it, status.end());
+
+ absl::string_view vss_str(vss_it->second);
+
+ // Room for the " kB" suffix plus at least one digit.
+ ASSERT_GT(vss_str.length(), 3);
+ EXPECT_TRUE(absl::EndsWith(vss_str, " kB"));
+ // Everything else is part of a number.
+ EXPECT_TRUE(IsDigits(vss_str.substr(0, vss_str.length() - 3))) << vss_str;
+ // ... which is not 0.
+ EXPECT_NE('0', vss_str[0]);
+
+ const auto rss_it = status.find("VmRSS");
+ ASSERT_NE(rss_it, status.end());
+
+ absl::string_view rss_str(rss_it->second);
+
+ // Room for the " kB" suffix plus at least one digit.
+ ASSERT_GT(rss_str.length(), 3);
+ EXPECT_TRUE(absl::EndsWith(rss_str, " kB"));
+ // Everything else is part of a number.
+ EXPECT_TRUE(IsDigits(rss_str.substr(0, rss_str.length() - 3))) << rss_str;
+ // ... which is not 0.
+ EXPECT_NE('0', rss_str[0]);
+}
+
+// Parse an array of NUL-terminated char* arrays, returning a vector of strings.
+std::vector<std::string> ParseNulTerminatedStrings(std::string contents) {
+ EXPECT_EQ('\0', contents.back());
+ // The split will leave an empty std::string if the NUL-byte remains, so pop it.
+ contents.pop_back();
+
+ return absl::StrSplit(contents, '\0');
+}
+
+TEST(ProcPidCmdline, MatchesArgv) {
+ std::vector<std::string> proc_cmdline = ParseNulTerminatedStrings(
+ ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/cmdline")));
+ EXPECT_THAT(saved_argv, ContainerEq(proc_cmdline));
+}
+
+TEST(ProcPidEnviron, MatchesEnviron) {
+ std::vector<std::string> proc_environ = ParseNulTerminatedStrings(
+ ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/environ")));
+ // Get the environment from the environ variable, which we will compare with
+ // /proc/self/environ.
+ std::vector<std::string> env;
+ for (char** v = environ; *v; v++) {
+ env.push_back(*v);
+ }
+ EXPECT_THAT(env, ContainerEq(proc_environ));
+}
+
+TEST(ProcPidCmdline, SubprocessForkSameCmdline) {
+ std::vector<std::string> proc_cmdline_parent;
+ std::vector<std::string> proc_cmdline;
+ proc_cmdline_parent = ParseNulTerminatedStrings(
+ ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/cmdline")));
+ auto res = WithSubprocess(
+ [&](int pid) -> PosixError {
+ ASSIGN_OR_RETURN_ERRNO(
+ auto raw_cmdline,
+ GetContents(absl::StrCat("/proc/", pid, "/cmdline")));
+ proc_cmdline = ParseNulTerminatedStrings(raw_cmdline);
+ return NoError();
+ },
+ nullptr, nullptr);
+ ASSERT_NO_ERRNO(res);
+
+ for (size_t i = 0; i < proc_cmdline_parent.size(); i++) {
+ EXPECT_EQ(proc_cmdline_parent[i], proc_cmdline[i]);
+ }
+}
+
+// Test whether /proc/PID/ symlinks can be read for a running process.
+TEST(ProcPidSymlink, SubprocessRunning) {
+ char buf[1];
+
+ EXPECT_THAT(ReadlinkWhileRunning("exe", buf, sizeof(buf)),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ EXPECT_THAT(ReadlinkWhileRunning("ns/net", buf, sizeof(buf)),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ EXPECT_THAT(ReadlinkWhileRunning("ns/pid", buf, sizeof(buf)),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ EXPECT_THAT(ReadlinkWhileRunning("ns/user", buf, sizeof(buf)),
+ SyscallSucceedsWithValue(sizeof(buf)));
+}
+// FIXME: Inconsistent behavior between gVisor and linux
+// on proc files.
+TEST(ProcPidSymlink, SubprocessZombied) {
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+ char buf[1];
+
+ int want = EACCES;
+ if (!IsRunningOnGvisor()) {
+ auto version = ASSERT_NO_ERRNO_AND_VALUE(GetKernelVersion());
+ if (version.major == 4 && version.minor > 3) {
+ want = ENOENT;
+ }
+ }
+
+ EXPECT_THAT(ReadlinkWhileZombied("exe", buf, sizeof(buf)),
+ SyscallFailsWithErrno(want));
+
+ if (!IsRunningOnGvisor()) {
+ EXPECT_THAT(ReadlinkWhileZombied("ns/net", buf, sizeof(buf)),
+ SyscallFailsWithErrno(want));
+ }
+
+ // FIXME: Inconsistent behavior between gVisor and linux
+ // on proc files.
+ // 4.17 & gVisor: Syscall succeeds and returns 1
+ // EXPECT_THAT(ReadlinkWhileZombied("ns/pid", buf, sizeof(buf)),
+ // SyscallFailsWithErrno(EACCES));
+
+ // FIXME: Inconsistent behavior between gVisor and linux
+ // on proc files.
+ // 4.17 & gVisor: Syscall succeeds and returns 1.
+ // EXPECT_THAT(ReadlinkWhileZombied("ns/user", buf, sizeof(buf)),
+ // SyscallFailsWithErrno(EACCES));
+}
+
+// Test whether /proc/PID/ symlinks can be read for an exited process.
+TEST(ProcPidSymlink, SubprocessExited) {
+ // FIXME: These all succeed on gVisor.
+ SKIP_IF(IsRunningOnGvisor());
+
+ char buf[1];
+
+ EXPECT_THAT(ReadlinkWhileExited("exe", buf, sizeof(buf)),
+ SyscallFailsWithErrno(ESRCH));
+
+ EXPECT_THAT(ReadlinkWhileExited("ns/net", buf, sizeof(buf)),
+ SyscallFailsWithErrno(ESRCH));
+
+ EXPECT_THAT(ReadlinkWhileExited("ns/pid", buf, sizeof(buf)),
+ SyscallFailsWithErrno(ESRCH));
+
+ EXPECT_THAT(ReadlinkWhileExited("ns/user", buf, sizeof(buf)),
+ SyscallFailsWithErrno(ESRCH));
+}
+
+// /proc/PID/exe points to the correct binary.
+TEST(ProcPidExe, Subprocess) {
+ auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/exe"));
+ auto expected_absolute_path =
+ ASSERT_NO_ERRNO_AND_VALUE(MakeAbsolute(link, ""));
+
+ char actual[PATH_MAX + 1] = {};
+ ASSERT_THAT(ReadlinkWhileRunning("exe", actual, sizeof(actual)),
+ SyscallSucceedsWithValue(Gt(0)));
+ EXPECT_EQ(actual, expected_absolute_path);
+}
+
+// Test whether /proc/PID/ files can be read for a running process.
+TEST(ProcPidFile, SubprocessRunning) {
+ char buf[1];
+
+ EXPECT_THAT(ReadWhileRunning("auxv", buf, sizeof(buf)),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ EXPECT_THAT(ReadWhileRunning("cmdline", buf, sizeof(buf)),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ EXPECT_THAT(ReadWhileRunning("comm", buf, sizeof(buf)),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ EXPECT_THAT(ReadWhileRunning("gid_map", buf, sizeof(buf)),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ EXPECT_THAT(ReadWhileRunning("io", buf, sizeof(buf)),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ EXPECT_THAT(ReadWhileRunning("maps", buf, sizeof(buf)),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ EXPECT_THAT(ReadWhileRunning("stat", buf, sizeof(buf)),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ EXPECT_THAT(ReadWhileRunning("status", buf, sizeof(buf)),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ EXPECT_THAT(ReadWhileRunning("uid_map", buf, sizeof(buf)),
+ SyscallSucceedsWithValue(sizeof(buf)));
+}
+
+// Test whether /proc/PID/ files can be read for a zombie process.
+TEST(ProcPidFile, SubprocessZombie) {
+ char buf[1];
+ // 4.17: Succeeds and returns 1
+ // gVisor: Succeds and returns 0
+ EXPECT_THAT(ReadWhileZombied("auxv", buf, sizeof(buf)), SyscallSucceeds());
+
+ EXPECT_THAT(ReadWhileZombied("cmdline", buf, sizeof(buf)),
+ SyscallSucceedsWithValue(0));
+
+ EXPECT_THAT(ReadWhileZombied("comm", buf, sizeof(buf)),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ EXPECT_THAT(ReadWhileZombied("gid_map", buf, sizeof(buf)),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ EXPECT_THAT(ReadWhileZombied("maps", buf, sizeof(buf)),
+ SyscallSucceedsWithValue(0));
+
+ EXPECT_THAT(ReadWhileZombied("stat", buf, sizeof(buf)),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ EXPECT_THAT(ReadWhileZombied("status", buf, sizeof(buf)),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ EXPECT_THAT(ReadWhileZombied("uid_map", buf, sizeof(buf)),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ // FIXME: Inconsistent behavior between gVisor and linux
+ // on proc files.
+ // gVisor & 4.17: Succeeds and returns 1.
+ // EXPECT_THAT(ReadWhileZombied("io", buf, sizeof(buf)),
+ // SyscallFailsWithErrno(EACCES));
+}
+
+// Test whether /proc/PID/ files can be read for an exited process.
+TEST(ProcPidFile, SubprocessExited) {
+ char buf[1];
+
+ // FIXME: Inconsistent behavior between kernels
+ // gVisor: Fails with ESRCH.
+ // 4.17: Succeeds and returns 1.
+ // EXPECT_THAT(ReadWhileExited("auxv", buf, sizeof(buf)),
+ // SyscallFailsWithErrno(ESRCH));
+
+ EXPECT_THAT(ReadWhileExited("cmdline", buf, sizeof(buf)),
+ SyscallFailsWithErrno(ESRCH));
+
+ if (!IsRunningOnGvisor()) {
+ // FIXME: Succeeds on gVisor.
+ EXPECT_THAT(ReadWhileExited("comm", buf, sizeof(buf)),
+ SyscallFailsWithErrno(ESRCH));
+ }
+
+ EXPECT_THAT(ReadWhileExited("gid_map", buf, sizeof(buf)),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ if (!IsRunningOnGvisor()) {
+ // FIXME: Succeeds on gVisor.
+ EXPECT_THAT(ReadWhileExited("io", buf, sizeof(buf)),
+ SyscallFailsWithErrno(ESRCH));
+ }
+
+ if (!IsRunningOnGvisor()) {
+ // FIXME: Returns EOF on gVisor.
+ EXPECT_THAT(ReadWhileExited("maps", buf, sizeof(buf)),
+ SyscallFailsWithErrno(ESRCH));
+ }
+
+ if (!IsRunningOnGvisor()) {
+ // FIXME: Succeeds on gVisor.
+ EXPECT_THAT(ReadWhileExited("stat", buf, sizeof(buf)),
+ SyscallFailsWithErrno(ESRCH));
+ }
+
+ if (!IsRunningOnGvisor()) {
+ // FIXME: Succeeds on gVisor.
+ EXPECT_THAT(ReadWhileExited("status", buf, sizeof(buf)),
+ SyscallFailsWithErrno(ESRCH));
+ }
+
+ EXPECT_THAT(ReadWhileExited("uid_map", buf, sizeof(buf)),
+ SyscallSucceedsWithValue(sizeof(buf)));
+}
+
+PosixError DirContainsImpl(absl::string_view path,
+ const std::vector<std::string>& targets, bool strict) {
+ ASSIGN_OR_RETURN_ERRNO(auto listing, ListDir(path, false));
+ bool success = true;
+
+ for (auto& expected_entry : targets) {
+ auto cursor = std::find(listing.begin(), listing.end(), expected_entry);
+ if (cursor == listing.end()) {
+ success = false;
+ }
+ }
+
+ if (!success) {
+ return PosixError(
+ ENOENT,
+ absl::StrCat("Failed to find one or more paths in '", path, "'"));
+ }
+
+ if (strict) {
+ if (targets.size() != listing.size()) {
+ return PosixError(
+ EINVAL,
+ absl::StrCat("Expected to find ", targets.size(), " elements in '",
+ path, "', but found ", listing.size()));
+ }
+ }
+
+ return NoError();
+}
+
+PosixError DirContains(absl::string_view path,
+ const std::vector<std::string>& targets) {
+ return DirContainsImpl(path, targets, false);
+}
+
+PosixError DirContainsExactly(absl::string_view path,
+ const std::vector<std::string>& targets) {
+ return DirContainsImpl(path, targets, true);
+}
+
+PosixError EventuallyDirContainsExactly(absl::string_view path,
+ const std::vector<std::string>& targets) {
+ constexpr int kRetryCount = 100;
+ const absl::Duration kRetryDelay = absl::Milliseconds(100);
+
+ for (int i = 0; i < kRetryCount; ++i) {
+ auto res = DirContainsExactly(path, targets);
+ if (res.ok()) {
+ return res;
+ } else if (i < kRetryCount - 1) {
+ // Sleep if this isn't the final iteration.
+ absl::SleepFor(kRetryDelay);
+ }
+ }
+ return PosixError(ETIMEDOUT,
+ "Timed out while waiting for directory to contain files ");
+}
+
+TEST(ProcTask, Basic) {
+ EXPECT_NO_ERRNO(
+ DirContains("/proc/self/task", {".", "..", absl::StrCat(getpid())}));
+}
+
+std::vector<std::string> TaskFiles(const std::vector<std::string>& initial_contents,
+ const std::vector<pid_t>& pids) {
+ return VecCat<std::string>(
+ initial_contents,
+ ApplyVec<std::string>([](const pid_t p) { return absl::StrCat(p); }, pids));
+}
+
+std::vector<std::string> TaskFiles(const std::vector<pid_t>& pids) {
+ return TaskFiles({".", "..", absl::StrCat(getpid())}, pids);
+}
+
+// Helper class for creating a new task in the current thread group.
+class BlockingChild {
+ public:
+ BlockingChild() : thread_([=] { Start(); }) {}
+ ~BlockingChild() { Join(); }
+
+ pid_t Tid() const {
+ absl::MutexLock ml(&mu_);
+ mu_.Await(absl::Condition(&tid_ready_));
+ return tid_;
+ }
+
+ void Join() { Stop(); }
+
+ private:
+ void Start() {
+ absl::MutexLock ml(&mu_);
+ tid_ = syscall(__NR_gettid);
+ tid_ready_ = true;
+ mu_.Await(absl::Condition(&stop_));
+ }
+
+ void Stop() {
+ absl::MutexLock ml(&mu_);
+ stop_ = true;
+ }
+
+ mutable absl::Mutex mu_;
+ bool stop_ GUARDED_BY(mu_) = false;
+ pid_t tid_;
+ bool tid_ready_ GUARDED_BY(mu_) = false;
+
+ // Must be last to ensure that the destructor for the thread is run before
+ // any other member of the object is destroyed.
+ ScopedThread thread_;
+};
+
+TEST(ProcTask, NewThreadAppears) {
+ auto initial = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/proc/self/task", false));
+ BlockingChild child1;
+ EXPECT_NO_ERRNO(DirContainsExactly("/proc/self/task",
+ TaskFiles(initial, {child1.Tid()})));
+}
+
+TEST(ProcTask, KilledThreadsDisappear) {
+ auto initial = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/proc/self/task/", false));
+
+ BlockingChild child1;
+ EXPECT_NO_ERRNO(DirContainsExactly("/proc/self/task",
+ TaskFiles(initial, {child1.Tid()})));
+
+ // Stat child1's task file.
+ struct stat statbuf;
+ const std::string child1_task_file =
+ absl::StrCat("/proc/self/task/", child1.Tid());
+ EXPECT_THAT(stat(child1_task_file.c_str(), &statbuf), SyscallSucceeds());
+
+ BlockingChild child2;
+ EXPECT_NO_ERRNO(DirContainsExactly(
+ "/proc/self/task", TaskFiles(initial, {child1.Tid(), child2.Tid()})));
+
+ BlockingChild child3;
+ BlockingChild child4;
+ BlockingChild child5;
+ EXPECT_NO_ERRNO(DirContainsExactly(
+ "/proc/self/task",
+ TaskFiles(initial, {child1.Tid(), child2.Tid(), child3.Tid(),
+ child4.Tid(), child5.Tid()})));
+
+ child2.Join();
+ EXPECT_NO_ERRNO(EventuallyDirContainsExactly(
+ "/proc/self/task", TaskFiles(initial, {child1.Tid(), child3.Tid(),
+ child4.Tid(), child5.Tid()})));
+
+ child1.Join();
+ child4.Join();
+ EXPECT_NO_ERRNO(EventuallyDirContainsExactly(
+ "/proc/self/task", TaskFiles(initial, {child3.Tid(), child5.Tid()})));
+
+ // Stat child1's task file again. This time it should fail.
+ EXPECT_THAT(stat(child1_task_file.c_str(), &statbuf),
+ SyscallFailsWithErrno(ENOENT));
+
+ child3.Join();
+ child5.Join();
+ EXPECT_NO_ERRNO(EventuallyDirContainsExactly("/proc/self/task", initial));
+}
+
+TEST(ProcTask, ChildTaskDir) {
+ BlockingChild child1;
+ EXPECT_NO_ERRNO(DirContains("/proc/self/task", TaskFiles({child1.Tid()})));
+ EXPECT_NO_ERRNO(DirContains(absl::StrCat("/proc/", child1.Tid(), "/task"),
+ TaskFiles({child1.Tid()})));
+}
+
+PosixError VerifyPidDir(std::string path) {
+ return DirContains(path, {"exe", "fd", "io", "maps", "ns", "stat", "status"});
+}
+
+TEST(ProcTask, VerifyTaskDir) {
+ EXPECT_NO_ERRNO(VerifyPidDir("/proc/self"));
+
+ EXPECT_NO_ERRNO(VerifyPidDir(absl::StrCat("/proc/self/task/", getpid())));
+ BlockingChild child1;
+ EXPECT_NO_ERRNO(VerifyPidDir(absl::StrCat("/proc/self/task/", child1.Tid())));
+
+ // Only the first level of task directories should contain the 'task'
+ // directory. That is:
+ //
+ // /proc/1234/task <- should exist
+ // /proc/1234/task/1234/task <- should not exist
+ // /proc/1234/task/1235/task <- should not exist (where 1235 is in the same
+ // thread group as 1234).
+ EXPECT_FALSE(
+ DirContains(absl::StrCat("/proc/self/task/", getpid()), {"task"}).ok())
+ << "Found 'task' directory in an inner directory.";
+}
+
+TEST(ProcTask, TaskDirCannotBeDeleted) {
+ // Drop capabilities that allow us to override file and directory permissions.
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+
+ EXPECT_THAT(rmdir("/proc/self/task"), SyscallFails());
+ EXPECT_THAT(rmdir(absl::StrCat("/proc/self/task/", getpid()).c_str()),
+ SyscallFailsWithErrno(EACCES));
+}
+
+TEST(ProcTask, TaskDirHasCorrectMetadata) {
+ struct stat st;
+ EXPECT_THAT(stat("/proc/self/task", &st), SyscallSucceeds());
+ EXPECT_TRUE(S_ISDIR(st.st_mode));
+
+ // Verify file is readable and executable by everyone.
+ mode_t expected_permissions =
+ S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
+ mode_t permissions = st.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
+ EXPECT_EQ(expected_permissions, permissions);
+}
+
+TEST(ProcTask, TaskDirCanSeekToEnd) {
+ const FileDescriptor dirfd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/task", O_RDONLY));
+ EXPECT_THAT(lseek(dirfd.get(), 0, SEEK_END), SyscallSucceeds());
+}
+
+TEST(ProcTask, VerifyTaskDirNlinks) {
+ // A task directory will have 3 links if the taskgroup has a single
+ // thread. For example, the following shows where the links to
+ // '/proc/12345/task comes' from for a single threaded process with pid 12345:
+ //
+ // /proc/12345/task <-- 1 link for the directory itself
+ // . <-- link from "."
+ // ..
+ // 12345
+ // .
+ // .. <-- link from ".." to parent.
+ // <other contents of a task dir>
+ //
+ // We can't assert an absolute number of links since we don't control how many
+ // threads the test framework spawns. Instead, we'll ensure creating a new
+ // thread increases the number of links as expected.
+
+ // Once we reach the test body, we can count on the thread count being stable
+ // unless we spawn a new one.
+ uint64_t initial_links = ASSERT_NO_ERRNO_AND_VALUE(Links("/proc/self/task"));
+ ASSERT_GE(initial_links, 3);
+
+ // For each new subtask, we should gain a new link.
+ BlockingChild child1;
+ EXPECT_THAT(Links("/proc/self/task"),
+ IsPosixErrorOkAndHolds(initial_links + 1));
+ BlockingChild child2;
+ EXPECT_THAT(Links("/proc/self/task"),
+ IsPosixErrorOkAndHolds(initial_links + 2));
+}
+
+TEST(ProcTask, CommContainsThreadNameAndTrailingNewline) {
+ constexpr char kThreadName[] = "TestThread12345";
+ ASSERT_THAT(prctl(PR_SET_NAME, kThreadName), SyscallSucceeds());
+
+ auto thread_name = ASSERT_NO_ERRNO_AND_VALUE(
+ GetContents(JoinPath("/proc", absl::StrCat(getpid()), "task",
+ absl::StrCat(syscall(SYS_gettid)), "comm")));
+ EXPECT_EQ(absl::StrCat(kThreadName, "\n"), thread_name);
+}
+
+TEST(ProcTaskNs, NsDirExistsAndHasCorrectMetadata) {
+ EXPECT_NO_ERRNO(DirContains("/proc/self/ns", {"net", "pid", "user"}));
+
+ // Let's just test the 'pid' entry, all of them are very similar.
+ struct stat st;
+ EXPECT_THAT(lstat("/proc/self/ns/pid", &st), SyscallSucceeds());
+ EXPECT_TRUE(S_ISLNK(st.st_mode));
+
+ auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/ns/pid"));
+ EXPECT_THAT(link, ::testing::StartsWith("pid:["));
+}
+
+TEST(ProcTaskNs, AccessOnNsNodeSucceeds) {
+ EXPECT_THAT(access("/proc/self/ns/pid", F_OK), SyscallSucceeds());
+}
+
+TEST(ProcSysKernelHostname, Exists) {
+ EXPECT_THAT(open("/proc/sys/kernel/hostname", O_RDONLY), SyscallSucceeds());
+}
+
+TEST(ProcSysKernelHostname, MatchesUname) {
+ struct utsname buf;
+ EXPECT_THAT(uname(&buf), SyscallSucceeds());
+ const std::string hostname = absl::StrCat(buf.nodename, "\n");
+ auto procfs_hostname =
+ ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/kernel/hostname"));
+ EXPECT_EQ(procfs_hostname, hostname);
+}
+
+TEST(ProcSysVmMmapMinAddr, HasNumericValue) {
+ const std::string mmap_min_addr_str =
+ ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/vm/mmap_min_addr"));
+ uintptr_t mmap_min_addr;
+ EXPECT_TRUE(absl::SimpleAtoi(mmap_min_addr_str, &mmap_min_addr))
+ << "/proc/sys/vm/mmap_min_addr does not contain a numeric value: "
+ << mmap_min_addr_str;
+}
+
+TEST(ProcSysVmOvercommitMemory, HasNumericValue) {
+ const std::string overcommit_memory_str =
+ ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/vm/overcommit_memory"));
+ uintptr_t overcommit_memory;
+ EXPECT_TRUE(absl::SimpleAtoi(overcommit_memory_str, &overcommit_memory))
+ << "/proc/sys/vm/overcommit_memory does not contain a numeric value: "
+ << overcommit_memory;
+}
+
+// Check that link for proc fd entries point the target node, not the
+// symlink itself.
+TEST(ProcTaskFd, FstatatFollowsSymlink) {
+ const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY));
+
+ struct stat sproc = {};
+ EXPECT_THAT(
+ fstatat(-1, absl::StrCat("/proc/self/fd/", fd.get()).c_str(), &sproc, 0),
+ SyscallSucceeds());
+
+ struct stat sfile = {};
+ EXPECT_THAT(fstatat(-1, file.path().c_str(), &sfile, 0), SyscallSucceeds());
+
+ // If fstatat follows the fd symlink, the device and inode numbers should
+ // match at a minimum.
+ EXPECT_EQ(sproc.st_dev, sfile.st_dev);
+ EXPECT_EQ(sproc.st_ino, sfile.st_ino);
+ EXPECT_EQ(0, memcmp(&sfile, &sproc, sizeof(sfile)));
+}
+
+TEST(ProcFilesystems, Bug65172365) {
+ std::string proc_filesystems =
+ ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/filesystems"));
+ ASSERT_FALSE(proc_filesystems.empty());
+}
+
+TEST(ProcFilesystems, PresenceOfShmMaxMniAll) {
+ uint64_t shmmax = 0;
+ uint64_t shmall = 0;
+ uint64_t shmmni = 0;
+ std::string proc_file;
+ proc_file = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/kernel/shmmax"));
+ ASSERT_FALSE(proc_file.empty());
+ ASSERT_TRUE(absl::SimpleAtoi(proc_file, &shmmax));
+ proc_file = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/kernel/shmall"));
+ ASSERT_FALSE(proc_file.empty());
+ ASSERT_TRUE(absl::SimpleAtoi(proc_file, &shmall));
+ proc_file = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/kernel/shmmni"));
+ ASSERT_FALSE(proc_file.empty());
+ ASSERT_TRUE(absl::SimpleAtoi(proc_file, &shmmni));
+
+ ASSERT_GT(shmmax, 0);
+ ASSERT_GT(shmall, 0);
+ ASSERT_GT(shmmni, 0);
+ ASSERT_LE(shmall, shmmax);
+
+ // These values should never be higher than this by default, for more
+ // information see uapi/linux/shm.h
+ ASSERT_LE(shmmax, ULONG_MAX - (1UL << 24));
+ ASSERT_LE(shmall, ULONG_MAX - (1UL << 24));
+}
+
+// Check that /proc/mounts is a symlink to self/mounts.
+TEST(ProcMounts, IsSymlink) {
+ auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/mounts"));
+ EXPECT_EQ(link, "self/mounts");
+}
+
+// Check that /proc/self/mounts looks something like a real mounts file.
+TEST(ProcSelfMounts, RequiredFieldsArePresent) {
+ auto mounts = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/mounts"));
+ EXPECT_THAT(mounts,
+ AllOf(
+ // Root mount.
+ ContainsRegex(R"(\S+ / \S+ (rw|ro)\S* [0-9]+ [0-9]+\s)"),
+ // Root mount.
+ ContainsRegex(R"(\S+ /proc \S+ rw\S* [0-9]+ [0-9]+\s)")));
+}
+} // namespace
+} // namespace testing
+} // namespace gvisor
+
+int main(int argc, char** argv) {
+ for (int i = 0; i < argc; ++i) {
+ gvisor::testing::saved_argv.emplace_back(std::string(argv[i]));
+ }
+
+ gvisor::testing::TestInit(&argc, &argv);
+ return RUN_ALL_TESTS();
+}
diff --git a/test/syscalls/linux/proc_net.cc b/test/syscalls/linux/proc_net.cc
new file mode 100644
index 000000000..6060d0644
--- /dev/null
+++ b/test/syscalls/linux/proc_net.cc
@@ -0,0 +1,59 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+TEST(ProcNetIfInet6, Format) {
+ auto ifinet6 = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/if_inet6"));
+ EXPECT_THAT(ifinet6,
+ ::testing::MatchesRegex(
+ // Ex: "00000000000000000000000000000001 01 80 10 80 lo\n"
+ "^([a-f\\d]{32}( [a-f\\d]{2}){4} +[a-z][a-z\\d]*\\n)+$"));
+}
+
+TEST(ProcSysNetIpv4Sack, Exists) {
+ EXPECT_THAT(open("/proc/sys/net/ipv4/tcp_sack", O_RDONLY), SyscallSucceeds());
+}
+
+TEST(ProcSysNetIpv4Sack, CanReadAndWrite) {
+ auto const fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/sys/net/ipv4/tcp_sack", O_RDWR));
+
+ char buf;
+ EXPECT_THAT(PreadFd(fd.get(), &buf, sizeof(buf), 0),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ EXPECT_TRUE(buf == '0' || buf == '1') << "unexpected tcp_sack: " << buf;
+
+ char to_write = (buf == '1') ? '0' : '1';
+ EXPECT_THAT(PwriteFd(fd.get(), &to_write, sizeof(to_write), 0),
+ SyscallSucceedsWithValue(sizeof(to_write)));
+
+ buf = 0;
+ EXPECT_THAT(PreadFd(fd.get(), &buf, sizeof(buf), 0),
+ SyscallSucceedsWithValue(sizeof(buf)));
+ EXPECT_EQ(buf, to_write);
+}
+
+} // namespace
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/pselect.cc b/test/syscalls/linux/pselect.cc
new file mode 100644
index 000000000..3294f6c14
--- /dev/null
+++ b/test/syscalls/linux/pselect.cc
@@ -0,0 +1,190 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <signal.h>
+#include <sys/select.h>
+
+#include "gtest/gtest.h"
+#include "absl/time/time.h"
+#include "test/syscalls/linux/base_poll_test.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+struct MaskWithSize {
+ sigset_t* mask;
+ size_t mask_size;
+};
+
+// Linux and glibc have a different idea of the sizeof sigset_t. When calling
+// the syscall directly, use what the kernel expects.
+unsigned kSigsetSize = SIGRTMAX / 8;
+
+// Linux pselect(2) differs from the glibc wrapper function in that Linux
+// updates the timeout with the amount of time remaining. In order to test this
+// behavior we need to use the syscall directly.
+int syscallPselect6(int nfds, fd_set* readfds, fd_set* writefds,
+ fd_set* exceptfds, struct timespec* timeout,
+ const MaskWithSize* mask_with_size) {
+ return syscall(SYS_pselect6, nfds, readfds, writefds, exceptfds, timeout,
+ mask_with_size);
+}
+
+class PselectTest : public BasePollTest {
+ protected:
+ void SetUp() override { BasePollTest::SetUp(); }
+ void TearDown() override { BasePollTest::TearDown(); }
+};
+
+// See that when there are no FD sets, pselect behaves like sleep.
+TEST_F(PselectTest, NullFds) {
+ struct timespec timeout = absl::ToTimespec(absl::Milliseconds(10));
+ ASSERT_THAT(syscallPselect6(0, nullptr, nullptr, nullptr, &timeout, nullptr),
+ SyscallSucceeds());
+ EXPECT_EQ(timeout.tv_sec, 0);
+ EXPECT_EQ(timeout.tv_nsec, 0);
+
+ timeout = absl::ToTimespec(absl::Milliseconds(10));
+ ASSERT_THAT(syscallPselect6(1, nullptr, nullptr, nullptr, &timeout, nullptr),
+ SyscallSucceeds());
+ EXPECT_EQ(timeout.tv_sec, 0);
+ EXPECT_EQ(timeout.tv_nsec, 0);
+}
+
+TEST_F(PselectTest, ClosedFds) {
+ fd_set read_set;
+ FD_ZERO(&read_set);
+ int fd;
+ ASSERT_THAT(fd = dup(1), SyscallSucceeds());
+ ASSERT_THAT(close(fd), SyscallSucceeds());
+ FD_SET(fd, &read_set);
+ struct timespec timeout = absl::ToTimespec(absl::Milliseconds(10));
+ EXPECT_THAT(
+ syscallPselect6(fd + 1, &read_set, nullptr, nullptr, &timeout, nullptr),
+ SyscallFailsWithErrno(EBADF));
+}
+
+TEST_F(PselectTest, ZeroTimeout) {
+ struct timespec timeout = {};
+ ASSERT_THAT(syscallPselect6(1, nullptr, nullptr, nullptr, &timeout, nullptr),
+ SyscallSucceeds());
+ EXPECT_EQ(timeout.tv_sec, 0);
+ EXPECT_EQ(timeout.tv_nsec, 0);
+}
+
+// If random S/R interrupts the pselect, SIGALRM may be delivered before pselect
+// restarts, causing the pselect to hang forever.
+TEST_F(PselectTest, NoTimeout_NoRandomSave) {
+ // When there's no timeout, pselect may never return so set a timer.
+ SetTimer(absl::Milliseconds(100));
+ // See that we get interrupted by the timer.
+ ASSERT_THAT(syscallPselect6(1, nullptr, nullptr, nullptr, nullptr, nullptr),
+ SyscallFailsWithErrno(EINTR));
+ EXPECT_TRUE(TimerFired());
+}
+
+TEST_F(PselectTest, InvalidTimeoutNegative) {
+ struct timespec timeout = absl::ToTimespec(absl::Seconds(-1));
+ ASSERT_THAT(syscallPselect6(1, nullptr, nullptr, nullptr, &timeout, nullptr),
+ SyscallFailsWithErrno(EINVAL));
+ EXPECT_EQ(timeout.tv_sec, -1);
+ EXPECT_EQ(timeout.tv_nsec, 0);
+}
+
+TEST_F(PselectTest, InvalidTimeoutNotNormalized) {
+ struct timespec timeout = {0, 1000000001};
+ ASSERT_THAT(syscallPselect6(1, nullptr, nullptr, nullptr, &timeout, nullptr),
+ SyscallFailsWithErrno(EINVAL));
+ EXPECT_EQ(timeout.tv_sec, 0);
+ EXPECT_EQ(timeout.tv_nsec, 1000000001);
+}
+
+TEST_F(PselectTest, EmptySigMaskInvalidMaskSize) {
+ struct timespec timeout = {};
+ MaskWithSize invalid = {nullptr, 7};
+ EXPECT_THAT(syscallPselect6(0, nullptr, nullptr, nullptr, &timeout, &invalid),
+ SyscallSucceeds());
+}
+
+TEST_F(PselectTest, EmptySigMaskValidMaskSize) {
+ struct timespec timeout = {};
+ MaskWithSize invalid = {nullptr, 8};
+ EXPECT_THAT(syscallPselect6(0, nullptr, nullptr, nullptr, &timeout, &invalid),
+ SyscallSucceeds());
+}
+
+TEST_F(PselectTest, InvalidMaskSize) {
+ struct timespec timeout = {};
+ sigset_t sigmask;
+ ASSERT_THAT(sigemptyset(&sigmask), SyscallSucceeds());
+ MaskWithSize invalid = {&sigmask, 7};
+ EXPECT_THAT(syscallPselect6(1, nullptr, nullptr, nullptr, &timeout, &invalid),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+// Verify that signals blocked by the pselect mask (that would otherwise be
+// allowed) do not interrupt pselect.
+TEST_F(PselectTest, SignalMaskBlocksSignal) {
+ absl::Duration duration(absl::Seconds(30));
+ struct timespec timeout = absl::ToTimespec(duration);
+ absl::Duration timer_duration(absl::Seconds(10));
+
+ // Call with a mask that blocks SIGALRM. See that pselect is not interrupted
+ // (i.e. returns 0) and that upon completion, the timer has fired.
+ sigset_t mask;
+ ASSERT_THAT(sigprocmask(0, nullptr, &mask), SyscallSucceeds());
+ ASSERT_THAT(sigaddset(&mask, SIGALRM), SyscallSucceeds());
+ MaskWithSize mask_with_size = {&mask, kSigsetSize};
+ SetTimer(timer_duration);
+ MaybeSave();
+ ASSERT_FALSE(TimerFired());
+ ASSERT_THAT(
+ syscallPselect6(1, nullptr, nullptr, nullptr, &timeout, &mask_with_size),
+ SyscallSucceeds());
+ EXPECT_TRUE(TimerFired());
+ EXPECT_EQ(absl::DurationFromTimespec(timeout), absl::Duration());
+}
+
+// Verify that signals allowed by the pselect mask (that would otherwise be
+// blocked) interrupt pselect.
+TEST_F(PselectTest, SignalMaskAllowsSignal) {
+ absl::Duration duration = absl::Seconds(30);
+ struct timespec timeout = absl::ToTimespec(duration);
+ absl::Duration timer_duration = absl::Seconds(10);
+
+ sigset_t mask;
+ ASSERT_THAT(sigprocmask(0, nullptr, &mask), SyscallSucceeds());
+
+ // Block SIGALRM.
+ auto cleanup =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, SIGALRM));
+
+ // Call with a mask that unblocks SIGALRM. See that pselect is interrupted.
+ MaskWithSize mask_with_size = {&mask, kSigsetSize};
+ SetTimer(timer_duration);
+ MaybeSave();
+ ASSERT_FALSE(TimerFired());
+ ASSERT_THAT(
+ syscallPselect6(1, nullptr, nullptr, nullptr, &timeout, &mask_with_size),
+ SyscallFailsWithErrno(EINTR));
+ EXPECT_TRUE(TimerFired());
+ EXPECT_GT(absl::DurationFromTimespec(timeout), absl::Duration());
+}
+
+} // namespace
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/ptrace.cc b/test/syscalls/linux/ptrace.cc
new file mode 100644
index 000000000..d3b3b8b02
--- /dev/null
+++ b/test/syscalls/linux/ptrace.cc
@@ -0,0 +1,948 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <elf.h>
+#include <signal.h>
+#include <stddef.h>
+#include <sys/ptrace.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/user.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <utility>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/logging.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Sends sig to the current process with tgkill(2).
+//
+// glibc's raise(2) may change the signal mask before sending the signal. These
+// extra syscalls make tests of syscall, signal interception, etc. difficult to
+// write.
+void RaiseSignal(int sig) {
+ pid_t pid = getpid();
+ TEST_PCHECK(pid > 0);
+ pid_t tid = gettid();
+ TEST_PCHECK(tid > 0);
+ TEST_PCHECK(tgkill(pid, tid, sig) == 0);
+}
+
+// Returns the Yama ptrace scope.
+PosixErrorOr<int> YamaPtraceScope() {
+ constexpr char kYamaPtraceScopePath[] = "/proc/sys/kernel/yama/ptrace_scope";
+
+ ASSIGN_OR_RETURN_ERRNO(bool exists, Exists(kYamaPtraceScopePath));
+ if (!exists) {
+ // File doesn't exist means no Yama, so the scope is disabled -> 0.
+ return 0;
+ }
+
+ std::string contents;
+ RETURN_IF_ERRNO(GetContents(kYamaPtraceScopePath, &contents));
+
+ int scope;
+ if (!absl::SimpleAtoi(contents, &scope)) {
+ return PosixError(EINVAL, absl::StrCat(contents, ": not a valid number"));
+ }
+
+ return scope;
+}
+
+TEST(PtraceTest, AttachSelf) {
+ EXPECT_THAT(ptrace(PTRACE_ATTACH, gettid(), 0, 0),
+ SyscallFailsWithErrno(EPERM));
+}
+
+TEST(PtraceTest, AttachSameThreadGroup) {
+ pid_t const tid = gettid();
+ ScopedThread([&] {
+ EXPECT_THAT(ptrace(PTRACE_ATTACH, tid, 0, 0), SyscallFailsWithErrno(EPERM));
+ });
+}
+
+TEST(PtraceTest, AttachParent_PeekData_PokeData_SignalSuppression) {
+ // Yama prevents attaching to a parent. Skip the test if the scope is anything
+ // except disabled.
+ SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) > 0);
+
+ constexpr long kBeforePokeDataValue = 10;
+ constexpr long kAfterPokeDataValue = 20;
+
+ volatile long word = kBeforePokeDataValue;
+
+ pid_t const child_pid = fork();
+ if (child_pid == 0) {
+ // In child process.
+
+ // Attach to the parent.
+ pid_t const parent_pid = getppid();
+ TEST_PCHECK(ptrace(PTRACE_ATTACH, parent_pid, 0, 0) == 0);
+ MaybeSave();
+
+ // Block until the parent enters signal-delivery-stop as a result of the
+ // SIGSTOP sent by PTRACE_ATTACH.
+ int status;
+ TEST_PCHECK(waitpid(parent_pid, &status, 0) == parent_pid);
+ MaybeSave();
+ TEST_CHECK(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
+
+ // Replace the value of word in the parent process with kAfterPokeDataValue.
+ long const parent_word = ptrace(PTRACE_PEEKDATA, parent_pid, &word, 0);
+ MaybeSave();
+ TEST_CHECK(parent_word == kBeforePokeDataValue);
+ TEST_PCHECK(
+ ptrace(PTRACE_POKEDATA, parent_pid, &word, kAfterPokeDataValue) == 0);
+ MaybeSave();
+
+ // Detach from the parent and suppress the SIGSTOP. If the SIGSTOP is not
+ // suppressed, the parent will hang in group-stop, causing the test to time
+ // out.
+ TEST_PCHECK(ptrace(PTRACE_DETACH, parent_pid, 0, 0) == 0);
+ MaybeSave();
+ _exit(0);
+ }
+ // In parent process.
+ ASSERT_THAT(child_pid, SyscallSucceeds());
+
+ // Wait for the child to complete.
+ int status;
+ ASSERT_THAT(waitpid(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << " status " << status;
+
+ // Check that the child's PTRACE_POKEDATA was effective.
+ EXPECT_EQ(kAfterPokeDataValue, word);
+}
+
+TEST(PtraceTest, GetSigMask) {
+ // <sys/user.h> doesn't define these until Linux 4.4, even though the features
+ // were added in 3.11.
+ constexpr auto kPtraceGetSigMask = static_cast<enum __ptrace_request>(0x420a);
+ constexpr auto kPtraceSetSigMask = static_cast<enum __ptrace_request>(0x420b);
+ // glibc and the Linux kernel define a sigset_t with different sizes. To avoid
+ // creating a kernel_sigset_t and recreating all the modification functions
+ // (sigemptyset, etc), we just hardcode the kernel sigset size.
+ constexpr int kSizeofKernelSigset = 8;
+ constexpr int kBlockSignal = SIGUSR1;
+ sigset_t blocked;
+ sigemptyset(&blocked);
+ sigaddset(&blocked, kBlockSignal);
+
+ pid_t const child_pid = fork();
+ if (child_pid == 0) {
+ // In child process.
+
+ // Install a signal handler for kBlockSignal to avoid termination and block
+ // it.
+ TEST_PCHECK(signal(kBlockSignal, +[](int signo) {}) != SIG_ERR);
+ MaybeSave();
+ TEST_PCHECK(sigprocmask(SIG_SETMASK, &blocked, nullptr) == 0);
+ MaybeSave();
+
+ // Enable tracing.
+ TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0);
+ MaybeSave();
+
+ // This should be blocked.
+ RaiseSignal(kBlockSignal);
+
+ // This should be suppressed by parent, who will change signal mask in the
+ // meantime, which means kBlockSignal should be delivered once this resumes.
+ RaiseSignal(SIGSTOP);
+
+ _exit(0);
+ }
+ // In parent process.
+ ASSERT_THAT(child_pid, SyscallSucceeds());
+
+ // Wait for the child to send itself SIGSTOP and enter signal-delivery-stop.
+ int status;
+ ASSERT_THAT(waitpid(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
+ << " status " << status;
+
+ // Get current signal mask.
+ sigset_t set;
+ EXPECT_THAT(ptrace(kPtraceGetSigMask, child_pid, kSizeofKernelSigset, &set),
+ SyscallSucceeds());
+ EXPECT_THAT(blocked, EqualsSigset(set));
+
+ // Try to get current signal mask with bad size argument.
+ EXPECT_THAT(ptrace(kPtraceGetSigMask, child_pid, 0, nullptr),
+ SyscallFailsWithErrno(EINVAL));
+
+ // Try to set bad signal mask.
+ sigset_t* bad_addr = reinterpret_cast<sigset_t*>(-1);
+ EXPECT_THAT(
+ ptrace(kPtraceSetSigMask, child_pid, kSizeofKernelSigset, bad_addr),
+ SyscallFailsWithErrno(EFAULT));
+
+ // Set signal mask to empty set.
+ sigset_t set1;
+ sigemptyset(&set1);
+ EXPECT_THAT(ptrace(kPtraceSetSigMask, child_pid, kSizeofKernelSigset, &set1),
+ SyscallSucceeds());
+
+ // Suppress SIGSTOP and resume the child. It should re-enter
+ // signal-delivery-stop for kBlockSignal.
+ ASSERT_THAT(ptrace(PTRACE_CONT, child_pid, 0, 0), SyscallSucceeds());
+ ASSERT_THAT(waitpid(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == kBlockSignal)
+ << " status " << status;
+
+ ASSERT_THAT(ptrace(PTRACE_CONT, child_pid, 0, 0), SyscallSucceeds());
+ ASSERT_THAT(waitpid(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ // Let's see that process exited normally.
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << " status " << status;
+}
+
+TEST(PtraceTest, GetSiginfo_SetSiginfo_SignalInjection) {
+ constexpr int kOriginalSigno = SIGUSR1;
+ constexpr int kInjectedSigno = SIGUSR2;
+
+ pid_t const child_pid = fork();
+ if (child_pid == 0) {
+ // In child process.
+
+ // Override all signal handlers.
+ struct sigaction sa = {};
+ sa.sa_handler = +[](int signo) { _exit(signo); };
+ TEST_PCHECK(sigfillset(&sa.sa_mask) == 0);
+ for (int signo = 1; signo < 32; signo++) {
+ if (signo == SIGKILL || signo == SIGSTOP) {
+ continue;
+ }
+ TEST_PCHECK(sigaction(signo, &sa, nullptr) == 0);
+ }
+ for (int signo = SIGRTMIN; signo <= SIGRTMAX; signo++) {
+ TEST_PCHECK(sigaction(signo, &sa, nullptr) == 0);
+ }
+
+ // Unblock all signals.
+ TEST_PCHECK(sigprocmask(SIG_UNBLOCK, &sa.sa_mask, nullptr) == 0);
+ MaybeSave();
+
+ // Send ourselves kOriginalSignal while ptraced and exit with the signal we
+ // actually receive via the signal handler, if any, or 0 if we don't receive
+ // a signal.
+ TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0);
+ MaybeSave();
+ RaiseSignal(kOriginalSigno);
+ _exit(0);
+ }
+ // In parent process.
+ ASSERT_THAT(child_pid, SyscallSucceeds());
+
+ // Wait for the child to send itself kOriginalSigno and enter
+ // signal-delivery-stop.
+ int status;
+ ASSERT_THAT(waitpid(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == kOriginalSigno)
+ << " status " << status;
+
+ siginfo_t siginfo = {};
+ ASSERT_THAT(ptrace(PTRACE_GETSIGINFO, child_pid, 0, &siginfo),
+ SyscallSucceeds());
+ EXPECT_EQ(kOriginalSigno, siginfo.si_signo);
+ EXPECT_EQ(SI_TKILL, siginfo.si_code);
+
+ // Replace the signal with kInjectedSigno, and check that the child exits
+ // with kInjectedSigno, indicating that signal injection was successful.
+ siginfo.si_signo = kInjectedSigno;
+ ASSERT_THAT(ptrace(PTRACE_SETSIGINFO, child_pid, 0, &siginfo),
+ SyscallSucceeds());
+ ASSERT_THAT(ptrace(PTRACE_DETACH, child_pid, 0, kInjectedSigno),
+ SyscallSucceeds());
+ ASSERT_THAT(waitpid(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == kInjectedSigno)
+ << " status " << status;
+}
+
+TEST(PtraceTest, SIGKILLDoesNotCauseSignalDeliveryStop) {
+ pid_t const child_pid = fork();
+ if (child_pid == 0) {
+ // In child process.
+ TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0);
+ MaybeSave();
+ RaiseSignal(SIGKILL);
+ TEST_CHECK_MSG(false, "Survived SIGKILL?");
+ _exit(1);
+ }
+ // In parent process.
+ ASSERT_THAT(child_pid, SyscallSucceeds());
+
+ // Expect the child to die to SIGKILL without entering signal-delivery-stop.
+ int status;
+ ASSERT_THAT(waitpid(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
+ << " status " << status;
+}
+
+TEST(PtraceTest, PtraceKill) {
+ constexpr int kOriginalSigno = SIGUSR1;
+
+ pid_t const child_pid = fork();
+ if (child_pid == 0) {
+ // In child process.
+ TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0);
+ MaybeSave();
+
+ // PTRACE_KILL only works if tracee has entered signal-delivery-stop.
+ RaiseSignal(kOriginalSigno);
+ TEST_CHECK_MSG(false, "Failed to kill the process?");
+ _exit(0);
+ }
+ // In parent process.
+ ASSERT_THAT(child_pid, SyscallSucceeds());
+
+ // Wait for the child to send itself kOriginalSigno and enter
+ // signal-delivery-stop.
+ int status;
+ ASSERT_THAT(waitpid(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == kOriginalSigno)
+ << " status " << status;
+
+ ASSERT_THAT(ptrace(PTRACE_KILL, child_pid, 0, 0), SyscallSucceeds());
+
+ // Expect the child to die with SIGKILL.
+ ASSERT_THAT(waitpid(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
+ << " status " << status;
+}
+
+TEST(PtraceTest, GetRegSet) {
+ pid_t const child_pid = fork();
+ if (child_pid == 0) {
+ // In child process.
+
+ // Enable tracing.
+ TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0);
+ MaybeSave();
+
+ // Use kill explicitly because we check the syscall argument register below.
+ kill(getpid(), SIGSTOP);
+
+ _exit(0);
+ }
+ // In parent process.
+ ASSERT_THAT(child_pid, SyscallSucceeds());
+
+ // Wait for the child to send itself SIGSTOP and enter signal-delivery-stop.
+ int status;
+ ASSERT_THAT(waitpid(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
+ << " status " << status;
+
+ // Get the general registers.
+ struct user_regs_struct regs;
+ struct iovec iov;
+ iov.iov_base = &regs;
+ iov.iov_len = sizeof(regs);
+ EXPECT_THAT(ptrace(PTRACE_GETREGSET, child_pid, NT_PRSTATUS, &iov),
+ SyscallSucceeds());
+
+ // Read exactly the full register set.
+ EXPECT_EQ(iov.iov_len, sizeof(regs));
+
+#ifdef __x86_64__
+ // Child called kill(2), with SIGSTOP as arg 2.
+ EXPECT_EQ(regs.rsi, SIGSTOP);
+#endif
+
+ // Suppress SIGSTOP and resume the child.
+ ASSERT_THAT(ptrace(PTRACE_CONT, child_pid, 0, 0), SyscallSucceeds());
+ ASSERT_THAT(waitpid(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ // Let's see that process exited normally.
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << " status " << status;
+}
+
+TEST(PtraceTest, AttachingConvertsGroupStopToPtraceStop) {
+ pid_t const child_pid = fork();
+ if (child_pid == 0) {
+ // In child process.
+ while (true) {
+ pause();
+ }
+ }
+ // In parent process.
+ ASSERT_THAT(child_pid, SyscallSucceeds());
+
+ // SIGSTOP the child and wait for it to stop.
+ ASSERT_THAT(kill(child_pid, SIGSTOP), SyscallSucceeds());
+ int status;
+ ASSERT_THAT(waitpid(child_pid, &status, WUNTRACED),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
+ << " status " << status;
+
+ // Attach to the child and expect it to re-enter a traced group-stop despite
+ // already being stopped.
+ ASSERT_THAT(ptrace(PTRACE_ATTACH, child_pid, 0, 0), SyscallSucceeds());
+ ASSERT_THAT(waitpid(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
+ << " status " << status;
+
+ // Verify that the child is ptrace-stopped by checking that it can receive
+ // ptrace commands requiring a ptrace-stop.
+ EXPECT_THAT(ptrace(PTRACE_SETOPTIONS, child_pid, 0, 0), SyscallSucceeds());
+
+ // Group-stop is distinguished from signal-delivery-stop by PTRACE_GETSIGINFO
+ // failing with EINVAL.
+ siginfo_t siginfo = {};
+ EXPECT_THAT(ptrace(PTRACE_GETSIGINFO, child_pid, 0, &siginfo),
+ SyscallFailsWithErrno(EINVAL));
+
+ // Detach from the child and expect it to stay stopped without a notification.
+ ASSERT_THAT(ptrace(PTRACE_DETACH, child_pid, 0, 0), SyscallSucceeds());
+ ASSERT_THAT(waitpid(child_pid, &status, WUNTRACED | WNOHANG),
+ SyscallSucceedsWithValue(0));
+
+ // Sending it SIGCONT should cause it to leave its stop.
+ ASSERT_THAT(kill(child_pid, SIGCONT), SyscallSucceeds());
+ ASSERT_THAT(waitpid(child_pid, &status, WCONTINUED),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_TRUE(WIFCONTINUED(status)) << " status " << status;
+
+ // Clean up the child.
+ ASSERT_THAT(kill(child_pid, SIGKILL), SyscallSucceeds());
+ ASSERT_THAT(waitpid(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
+ << " status " << status;
+}
+
+// Fixture for tests parameterized by whether or not to use PTRACE_O_TRACEEXEC.
+class PtraceExecveTest : public ::testing::TestWithParam<bool> {
+ protected:
+ bool TraceExec() const { return GetParam(); }
+};
+
+TEST_P(PtraceExecveTest, Execve_GetRegs_PeekUser_SIGKILL_TraceClone_TraceExit) {
+ pid_t const child_pid = fork();
+ if (child_pid == 0) {
+ // In child process.
+
+ // Enable tracing, then raise SIGSTOP and expect our parent to suppress it.
+ TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0);
+ MaybeSave();
+ RaiseSignal(SIGSTOP);
+ MaybeSave();
+
+ // Call execve in a non-leader thread.
+ ExecveArray const owned_child_argv = {"/proc/self/exe"};
+ char* const* const child_argv = owned_child_argv.get();
+ ScopedThread t([&] {
+ execve(child_argv[0], child_argv, /* envp = */ nullptr);
+ TEST_CHECK_MSG(false, "Survived execve? (thread)");
+ });
+ t.Join();
+ TEST_CHECK_MSG(false, "Survived execve? (main)");
+ _exit(1);
+ }
+ // In parent process.
+ ASSERT_THAT(child_pid, SyscallSucceeds());
+
+ // Wait for the child to send itself SIGSTOP and enter signal-delivery-stop.
+ int status;
+ ASSERT_THAT(waitpid(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
+ << " status " << status;
+
+ // Enable PTRACE_O_TRACECLONE so we can get the ID of the child's non-leader
+ // thread, PTRACE_O_TRACEEXIT so we can observe the leader's death, and
+ // PTRACE_O_TRACEEXEC if required by the test. (The leader doesn't call
+ // execve, but options should be inherited across clone.)
+ long opts = PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXIT;
+ if (TraceExec()) {
+ opts |= PTRACE_O_TRACEEXEC;
+ }
+ ASSERT_THAT(ptrace(PTRACE_SETOPTIONS, child_pid, 0, opts), SyscallSucceeds());
+
+ // Suppress the SIGSTOP and wait for the child's leader thread to report
+ // PTRACE_EVENT_CLONE. Get the new thread's ID from the event.
+ ASSERT_THAT(ptrace(PTRACE_CONT, child_pid, 0, 0), SyscallSucceeds());
+ ASSERT_THAT(waitpid(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_EQ(SIGTRAP | (PTRACE_EVENT_CLONE << 8), status >> 8);
+ unsigned long eventmsg;
+ ASSERT_THAT(ptrace(PTRACE_GETEVENTMSG, child_pid, 0, &eventmsg),
+ SyscallSucceeds());
+ pid_t const nonleader_tid = eventmsg;
+ pid_t const leader_tid = child_pid;
+
+ // The new thread should be ptraced and in signal-delivery-stop by SIGSTOP due
+ // to PTRACE_O_TRACECLONE.
+ //
+ // Before bf959931ddb88c4e4366e96dd22e68fa0db9527c "wait/ptrace: assume __WALL
+ // if the child is traced" (4.7) , waiting on it requires __WCLONE since, as a
+ // non-leader, its termination signal is 0. After, a standard wait is
+ // sufficient.
+ ASSERT_THAT(waitpid(nonleader_tid, &status, __WCLONE),
+ SyscallSucceedsWithValue(nonleader_tid));
+ EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
+ << " status " << status;
+
+ // Resume both child threads.
+ for (pid_t const tid : {leader_tid, nonleader_tid}) {
+ ASSERT_THAT(ptrace(PTRACE_CONT, tid, 0, 0), SyscallSucceeds());
+ }
+
+ // The non-leader child thread should call execve, causing the leader thread
+ // to enter PTRACE_EVENT_EXIT with an apparent exit code of 0. At this point,
+ // the leader has not yet exited, so the non-leader should be blocked in
+ // execve.
+ ASSERT_THAT(waitpid(leader_tid, &status, 0),
+ SyscallSucceedsWithValue(leader_tid));
+ EXPECT_EQ(SIGTRAP | (PTRACE_EVENT_EXIT << 8), status >> 8);
+ ASSERT_THAT(ptrace(PTRACE_GETEVENTMSG, leader_tid, 0, &eventmsg),
+ SyscallSucceeds());
+ EXPECT_TRUE(WIFEXITED(eventmsg) && WEXITSTATUS(eventmsg) == 0)
+ << " eventmsg " << eventmsg;
+ EXPECT_THAT(waitpid(nonleader_tid, &status, __WCLONE | WNOHANG),
+ SyscallSucceedsWithValue(0));
+
+ // Allow the leader to continue exiting. This should allow the non-leader to
+ // complete its execve, causing the original leader to be reaped without
+ // further notice and the non-leader to steal its ID.
+ ASSERT_THAT(ptrace(PTRACE_CONT, leader_tid, 0, 0), SyscallSucceeds());
+ ASSERT_THAT(waitpid(leader_tid, &status, 0),
+ SyscallSucceedsWithValue(leader_tid));
+ if (TraceExec()) {
+ // If PTRACE_O_TRACEEXEC was enabled, the execing thread should be in
+ // PTRACE_EVENT_EXEC-stop, with the event message set to its old thread ID.
+ EXPECT_EQ(SIGTRAP | (PTRACE_EVENT_EXEC << 8), status >> 8);
+ ASSERT_THAT(ptrace(PTRACE_GETEVENTMSG, leader_tid, 0, &eventmsg),
+ SyscallSucceeds());
+ EXPECT_EQ(nonleader_tid, eventmsg);
+ } else {
+ // Otherwise, the execing thread should have received SIGTRAP and should now
+ // be in signal-delivery-stop.
+ EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP)
+ << " status " << status;
+ }
+
+#ifdef __x86_64__
+ {
+ // CS should be 0x33, indicating an 64-bit binary.
+ constexpr uint64_t kAMD64UserCS = 0x33;
+ EXPECT_THAT(ptrace(PTRACE_PEEKUSER, leader_tid,
+ offsetof(struct user_regs_struct, cs), 0),
+ SyscallSucceedsWithValue(kAMD64UserCS));
+ struct user_regs_struct regs = {};
+ ASSERT_THAT(ptrace(PTRACE_GETREGS, leader_tid, 0, &regs),
+ SyscallSucceeds());
+ EXPECT_EQ(kAMD64UserCS, regs.cs);
+ }
+#endif // defined(__x86_64__)
+
+ // PTRACE_O_TRACEEXIT should have been inherited across execve. Send SIGKILL,
+ // which should end the PTRACE_EVENT_EXEC-stop or signal-delivery-stop and
+ // leave the child in PTRACE_EVENT_EXIT-stop.
+ ASSERT_THAT(kill(leader_tid, SIGKILL), SyscallSucceeds());
+ ASSERT_THAT(waitpid(leader_tid, &status, 0),
+ SyscallSucceedsWithValue(leader_tid));
+ EXPECT_EQ(SIGTRAP | (PTRACE_EVENT_EXIT << 8), status >> 8);
+ ASSERT_THAT(ptrace(PTRACE_GETEVENTMSG, leader_tid, 0, &eventmsg),
+ SyscallSucceeds());
+ EXPECT_TRUE(WIFSIGNALED(eventmsg) && WTERMSIG(eventmsg) == SIGKILL)
+ << " eventmsg " << eventmsg;
+
+ // End the PTRACE_EVENT_EXIT stop, allowing the child to exit.
+ ASSERT_THAT(ptrace(PTRACE_CONT, leader_tid, 0, 0), SyscallSucceeds());
+ ASSERT_THAT(waitpid(leader_tid, &status, 0),
+ SyscallSucceedsWithValue(leader_tid));
+ EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
+ << " status " << status;
+}
+
+INSTANTIATE_TEST_CASE_P(TraceExec, PtraceExecveTest, ::testing::Bool());
+
+// This test has expectations on when syscall-enter/exit-stops occur that are
+// violated if saving occurs, since saving interrupts all syscalls, causing
+// premature syscall-exit.
+TEST(PtraceTest,
+ ExitWhenParentIsNotTracer_Syscall_TraceVfork_TraceVforkDone_NoRandomSave) {
+ constexpr int kExitTraceeExitCode = 99;
+
+ pid_t const child_pid = fork();
+ if (child_pid == 0) {
+ // In child process.
+
+ // Block SIGCHLD so it doesn't interrupt wait4.
+ sigset_t mask;
+ TEST_PCHECK(sigemptyset(&mask) == 0);
+ TEST_PCHECK(sigaddset(&mask, SIGCHLD) == 0);
+ TEST_PCHECK(sigprocmask(SIG_SETMASK, &mask, nullptr) == 0);
+ MaybeSave();
+
+ // Enable tracing, then raise SIGSTOP and expect our parent to suppress it.
+ TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0);
+ MaybeSave();
+ RaiseSignal(SIGSTOP);
+ MaybeSave();
+
+ // Spawn a vfork child that exits immediately, and reap it. Don't save
+ // after vfork since the parent expects to see wait4 as the next syscall.
+ pid_t const pid = vfork();
+ if (pid == 0) {
+ _exit(kExitTraceeExitCode);
+ }
+ TEST_PCHECK_MSG(pid > 0, "vfork failed");
+
+ int status;
+ TEST_PCHECK(wait4(pid, &status, 0, nullptr) > 0);
+ MaybeSave();
+ TEST_CHECK(WIFEXITED(status) && WEXITSTATUS(status) == kExitTraceeExitCode);
+ _exit(0);
+ }
+ // In parent process.
+ ASSERT_THAT(child_pid, SyscallSucceeds());
+
+ // Wait for the child to send itself SIGSTOP and enter signal-delivery-stop.
+ int status;
+ ASSERT_THAT(child_pid, SyscallSucceeds());
+ ASSERT_THAT(waitpid(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
+ << " status " << status;
+
+ // Enable PTRACE_O_TRACEVFORK so we can get the ID of the grandchild,
+ // PTRACE_O_TRACEVFORKDONE so we can observe PTRACE_EVENT_VFORK_DONE, and
+ // PTRACE_O_TRACESYSGOOD so syscall-enter/exit-stops are unambiguously
+ // indicated by a stop signal of SIGTRAP|0x80 rather than just SIGTRAP.
+ ASSERT_THAT(ptrace(PTRACE_SETOPTIONS, child_pid, 0,
+ PTRACE_O_TRACEVFORK | PTRACE_O_TRACEVFORKDONE |
+ PTRACE_O_TRACESYSGOOD),
+ SyscallSucceeds());
+
+ // Suppress the SIGSTOP and wait for the child to report PTRACE_EVENT_VFORK.
+ // Get the new process' ID from the event.
+ ASSERT_THAT(ptrace(PTRACE_CONT, child_pid, 0, 0), SyscallSucceeds());
+ ASSERT_THAT(waitpid(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_EQ(SIGTRAP | (PTRACE_EVENT_VFORK << 8), status >> 8);
+ unsigned long eventmsg;
+ ASSERT_THAT(ptrace(PTRACE_GETEVENTMSG, child_pid, 0, &eventmsg),
+ SyscallSucceeds());
+ pid_t const grandchild_pid = eventmsg;
+
+ // The grandchild should be traced by us and in signal-delivery-stop by
+ // SIGSTOP due to PTRACE_O_TRACEVFORK. This allows us to wait on it even
+ // though we're not its parent.
+ ASSERT_THAT(waitpid(grandchild_pid, &status, 0),
+ SyscallSucceedsWithValue(grandchild_pid));
+ EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
+ << " status " << status;
+
+ // Resume the child with PTRACE_SYSCALL. Since the grandchild is still in
+ // signal-delivery-stop, the child should remain in vfork() waiting for the
+ // grandchild to exec or exit.
+ ASSERT_THAT(ptrace(PTRACE_SYSCALL, child_pid, 0, 0), SyscallSucceeds());
+ absl::SleepFor(absl::Seconds(1));
+ ASSERT_THAT(waitpid(child_pid, &status, WNOHANG),
+ SyscallSucceedsWithValue(0));
+
+ // Suppress the grandchild's SIGSTOP and wait for the grandchild to exit. Pass
+ // WNOWAIT to waitid() so that we don't acknowledge the grandchild's exit yet.
+ ASSERT_THAT(ptrace(PTRACE_CONT, grandchild_pid, 0, 0), SyscallSucceeds());
+ siginfo_t siginfo = {};
+ ASSERT_THAT(waitid(P_PID, grandchild_pid, &siginfo, WEXITED | WNOWAIT),
+ SyscallSucceeds());
+ EXPECT_EQ(SIGCHLD, siginfo.si_signo);
+ EXPECT_EQ(CLD_EXITED, siginfo.si_code);
+ EXPECT_EQ(kExitTraceeExitCode, siginfo.si_status);
+ EXPECT_EQ(grandchild_pid, siginfo.si_pid);
+ EXPECT_EQ(getuid(), siginfo.si_uid);
+
+ // The child should now be in PTRACE_EVENT_VFORK_DONE stop. The event
+ // message should still be the grandchild's PID.
+ ASSERT_THAT(waitpid(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_EQ(SIGTRAP | (PTRACE_EVENT_VFORK_DONE << 8), status >> 8);
+ ASSERT_THAT(ptrace(PTRACE_GETEVENTMSG, child_pid, 0, &eventmsg),
+ SyscallSucceeds());
+ EXPECT_EQ(grandchild_pid, eventmsg);
+
+ // Resume the child with PTRACE_SYSCALL again and expect it to enter
+ // syscall-exit-stop for vfork() or clone(), either of which should return the
+ // grandchild's PID from the syscall. Aside from PTRACE_O_TRACESYSGOOD,
+ // syscall-stops are distinguished from signal-delivery-stop by
+ // PTRACE_GETSIGINFO returning a siginfo for which si_code == SIGTRAP or
+ // SIGTRAP|0x80.
+ ASSERT_THAT(ptrace(PTRACE_SYSCALL, child_pid, 0, 0), SyscallSucceeds());
+ ASSERT_THAT(waitpid(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == (SIGTRAP | 0x80))
+ << " status " << status;
+ ASSERT_THAT(ptrace(PTRACE_GETSIGINFO, child_pid, 0, &siginfo),
+ SyscallSucceeds());
+ EXPECT_TRUE(siginfo.si_code == SIGTRAP || siginfo.si_code == (SIGTRAP | 0x80))
+ << "si_code = " << siginfo.si_code;
+#ifdef __x86_64__
+ {
+ struct user_regs_struct regs = {};
+ ASSERT_THAT(ptrace(PTRACE_GETREGS, child_pid, 0, &regs), SyscallSucceeds());
+ EXPECT_TRUE(regs.orig_rax == SYS_vfork || regs.orig_rax == SYS_clone)
+ << "orig_rax = " << regs.orig_rax;
+ EXPECT_EQ(grandchild_pid, regs.rax);
+ }
+#endif // defined(__x86_64__)
+
+ // After this point, the child will be making wait4 syscalls that will be
+ // interrupted by saving, so saving is not permitted. Note that this is
+ // explicitly released below once the grandchild exits.
+ DisableSave ds;
+
+ // Resume the child with PTRACE_SYSCALL again and expect it to enter
+ // syscall-enter-stop for wait4().
+ ASSERT_THAT(ptrace(PTRACE_SYSCALL, child_pid, 0, 0), SyscallSucceeds());
+ ASSERT_THAT(waitpid(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == (SIGTRAP | 0x80))
+ << " status " << status;
+ ASSERT_THAT(ptrace(PTRACE_GETSIGINFO, child_pid, 0, &siginfo),
+ SyscallSucceeds());
+ EXPECT_TRUE(siginfo.si_code == SIGTRAP || siginfo.si_code == (SIGTRAP | 0x80))
+ << "si_code = " << siginfo.si_code;
+#ifdef __x86_64__
+ {
+ EXPECT_THAT(ptrace(PTRACE_PEEKUSER, child_pid,
+ offsetof(struct user_regs_struct, orig_rax), 0),
+ SyscallSucceedsWithValue(SYS_wait4));
+ }
+#endif // defined(__x86_64__)
+
+ // Resume the child with PTRACE_SYSCALL again. Since the grandchild is
+ // waiting for the tracer (us) to acknowledge its exit first, wait4 should
+ // block.
+ ASSERT_THAT(ptrace(PTRACE_SYSCALL, child_pid, 0, 0), SyscallSucceeds());
+ absl::SleepFor(absl::Seconds(1));
+ ASSERT_THAT(waitpid(child_pid, &status, WNOHANG),
+ SyscallSucceedsWithValue(0));
+
+ // Acknowledge the grandchild's exit.
+ ASSERT_THAT(waitpid(grandchild_pid, &status, 0),
+ SyscallSucceedsWithValue(grandchild_pid));
+ ds.reset();
+
+ // Now the child should enter syscall-exit-stop for wait4, returning with the
+ // grandchild's PID.
+ ASSERT_THAT(waitpid(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == (SIGTRAP | 0x80))
+ << " status " << status;
+#ifdef __x86_64__
+ {
+ struct user_regs_struct regs = {};
+ ASSERT_THAT(ptrace(PTRACE_GETREGS, child_pid, 0, &regs), SyscallSucceeds());
+ EXPECT_EQ(SYS_wait4, regs.orig_rax);
+ EXPECT_EQ(grandchild_pid, regs.rax);
+ }
+#endif // defined(__x86_64__)
+
+ // Detach from the child and wait for it to exit.
+ ASSERT_THAT(ptrace(PTRACE_DETACH, child_pid, 0, 0), SyscallSucceeds());
+ ASSERT_THAT(waitpid(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << " status " << status;
+}
+
+// These tests requires knowledge of architecture-specific syscall convention.
+#ifdef __x86_64__
+TEST(PtraceTest, Sysemu_PokeUser) {
+ constexpr int kSysemuHelperFirstExitCode = 126;
+ constexpr uint64_t kSysemuInjectedExitGroupReturn = 42;
+
+ pid_t const child_pid = fork();
+ if (child_pid == 0) {
+ // In child process.
+
+ // Enable tracing, then raise SIGSTOP and expect our parent to suppress it.
+ TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0);
+ RaiseSignal(SIGSTOP);
+
+ // Try to exit_group, expecting the tracer to skip the syscall and set its
+ // own return value.
+ int const rv = syscall(SYS_exit_group, kSysemuHelperFirstExitCode);
+ TEST_PCHECK_MSG(rv == kSysemuInjectedExitGroupReturn,
+ "exit_group returned incorrect value");
+
+ _exit(0);
+ }
+ // In parent process.
+ ASSERT_THAT(child_pid, SyscallSucceeds());
+
+ // Wait for the child to send itself SIGSTOP and enter signal-delivery-stop.
+ int status;
+ ASSERT_THAT(waitpid(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
+ << " status " << status;
+
+ // Suppress the SIGSTOP and wait for the child to enter syscall-enter-stop
+ // for its first exit_group syscall. glibc doesn't necessarily define
+ // PTRACE_SYSEMU.
+ constexpr auto kPtraceSysemu = static_cast<__ptrace_request>(31);
+ ASSERT_THAT(ptrace(kPtraceSysemu, child_pid, 0, 0), SyscallSucceeds());
+ ASSERT_THAT(waitpid(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP)
+ << " status " << status;
+
+ struct user_regs_struct regs = {};
+ ASSERT_THAT(ptrace(PTRACE_GETREGS, child_pid, 0, &regs), SyscallSucceeds());
+ EXPECT_EQ(SYS_exit_group, regs.orig_rax);
+ EXPECT_EQ(-ENOSYS, regs.rax);
+ EXPECT_EQ(kSysemuHelperFirstExitCode, regs.rdi);
+
+ // Replace the exit_group return value, then resume the child, which should
+ // automatically skip the syscall.
+ ASSERT_THAT(
+ ptrace(PTRACE_POKEUSER, child_pid, offsetof(struct user_regs_struct, rax),
+ kSysemuInjectedExitGroupReturn),
+ SyscallSucceeds());
+ ASSERT_THAT(ptrace(PTRACE_DETACH, child_pid, 0, 0), SyscallSucceeds());
+
+ // The child should validate the injected return value and then exit normally.
+ ASSERT_THAT(waitpid(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << " status " << status;
+}
+
+// This test also cares about syscall-exit-stop.
+TEST(PtraceTest, ERESTART_NoRandomSave) {
+ constexpr int kSigno = SIGUSR1;
+
+ pid_t const child_pid = fork();
+ if (child_pid == 0) {
+ // In child process.
+
+ // Ignore, but unblock, kSigno.
+ struct sigaction sa = {};
+ sa.sa_handler = SIG_IGN;
+ TEST_PCHECK(sigfillset(&sa.sa_mask) == 0);
+ TEST_PCHECK(sigaction(kSigno, &sa, nullptr) == 0);
+ MaybeSave();
+ TEST_PCHECK(sigprocmask(SIG_UNBLOCK, &sa.sa_mask, nullptr) == 0);
+ MaybeSave();
+
+ // Enable tracing, then raise SIGSTOP and expect our parent to suppress it.
+ TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0);
+ RaiseSignal(SIGSTOP);
+
+ // Invoke the pause syscall, which normally should not return until we
+ // receive a signal that "either terminates the process or causes the
+ // invocation of a signal-catching function".
+ pause();
+
+ _exit(0);
+ }
+ ASSERT_THAT(child_pid, SyscallSucceeds());
+
+ // Wait for the child to send itself SIGSTOP and enter signal-delivery-stop.
+ int status;
+ ASSERT_THAT(waitpid(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)
+ << " status " << status;
+
+ // After this point, the child's pause syscall will be interrupted by saving,
+ // so saving is not permitted. Note that this is explicitly released below
+ // once the child is stopped.
+ DisableSave ds;
+
+ // Suppress the SIGSTOP and wait for the child to enter syscall-enter-stop for
+ // its pause syscall.
+ ASSERT_THAT(ptrace(PTRACE_SYSCALL, child_pid, 0, 0), SyscallSucceeds());
+ ASSERT_THAT(waitpid(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP)
+ << " status " << status;
+
+ struct user_regs_struct regs = {};
+ ASSERT_THAT(ptrace(PTRACE_GETREGS, child_pid, 0, &regs), SyscallSucceeds());
+ EXPECT_EQ(SYS_pause, regs.orig_rax);
+ EXPECT_EQ(-ENOSYS, regs.rax);
+
+ // Resume the child with PTRACE_SYSCALL and expect it to block in the pause
+ // syscall.
+ ASSERT_THAT(ptrace(PTRACE_SYSCALL, child_pid, 0, 0), SyscallSucceeds());
+ absl::SleepFor(absl::Seconds(1));
+ ASSERT_THAT(waitpid(child_pid, &status, WNOHANG),
+ SyscallSucceedsWithValue(0));
+
+ // Send the child kSigno, causing it to return ERESTARTNOHAND and enter
+ // syscall-exit-stop from the pause syscall.
+ constexpr int ERESTARTNOHAND = 514;
+ ASSERT_THAT(kill(child_pid, kSigno), SyscallSucceeds());
+ ASSERT_THAT(waitpid(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP)
+ << " status " << status;
+ ds.reset();
+
+ ASSERT_THAT(ptrace(PTRACE_GETREGS, child_pid, 0, &regs), SyscallSucceeds());
+ EXPECT_EQ(SYS_pause, regs.orig_rax);
+ EXPECT_EQ(-ERESTARTNOHAND, regs.rax);
+
+ // Replace the return value from pause with 0, causing pause to not be
+ // restarted despite kSigno being ignored.
+ ASSERT_THAT(ptrace(PTRACE_POKEUSER, child_pid,
+ offsetof(struct user_regs_struct, rax), 0),
+ SyscallSucceeds());
+
+ // Detach from the child and wait for it to exit.
+ ASSERT_THAT(ptrace(PTRACE_DETACH, child_pid, 0, 0), SyscallSucceeds());
+ ASSERT_THAT(waitpid(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << " status " << status;
+}
+#endif // defined(__x86_64__)
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/pty.cc b/test/syscalls/linux/pty.cc
new file mode 100644
index 000000000..253aa26ba
--- /dev/null
+++ b/test/syscalls/linux/pty.cc
@@ -0,0 +1,1230 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <linux/major.h>
+#include <poll.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <termios.h>
+#include <unistd.h>
+#include <iostream>
+
+#include "gtest/gtest.h"
+#include "absl/base/macros.h"
+#include "absl/strings/str_cat.h"
+#include "absl/synchronization/notification.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/posix_error.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+using ::testing::AnyOf;
+using ::testing::Contains;
+using ::testing::Eq;
+using ::testing::Not;
+
+// Tests Unix98 pseudoterminals.
+//
+// These tests assume that /dev/ptmx exists and is associated with a devpts
+// filesystem mounted at /dev/pts/. While a Linux distribution could
+// theoretically place those anywhere, glibc expects those locations, so they
+// are effectively fixed.
+
+// Minor device number for an unopened ptmx file.
+constexpr int kPtmxMinor = 2;
+
+// The timeout when polling for data from a pty. When data is written to one end
+// of a pty, Linux asynchronously makes it available to the other end, so we
+// have to wait.
+constexpr absl::Duration kTimeout = absl::Seconds(20);
+
+// The maximum line size in bytes returned per read from a pty file.
+constexpr int kMaxLineSize = 4096;
+
+// glibc defines its own, different, version of struct termios. We care about
+// what the kernel does, not glibc.
+#define KERNEL_NCCS 19
+struct kernel_termios {
+ tcflag_t c_iflag;
+ tcflag_t c_oflag;
+ tcflag_t c_cflag;
+ tcflag_t c_lflag;
+ cc_t c_line;
+ cc_t c_cc[KERNEL_NCCS];
+};
+
+bool operator==(struct kernel_termios const& a,
+ struct kernel_termios const& b) {
+ return memcmp(&a, &b, sizeof(a)) == 0;
+}
+
+// Returns the termios-style control character for the passed character.
+//
+// e.g., for Ctrl-C, i.e., ^C, call ControlCharacter('C').
+//
+// Standard control characters are ASCII bytes 0 through 31.
+constexpr char ControlCharacter(char c) {
+ // A is 1, B is 2, etc.
+ return c - 'A' + 1;
+}
+
+// Returns the printable character the given control character represents.
+constexpr char FromControlCharacter(char c) { return c + 'A' - 1; }
+
+// Returns true if c is a control character.
+//
+// Standard control characters are ASCII bytes 0 through 31.
+constexpr bool IsControlCharacter(char c) { return c <= 31; }
+
+struct Field {
+ const char* name;
+ uint64_t mask;
+ uint64_t value;
+};
+
+// ParseFields returns a std::string representation of value, using the names in
+// fields.
+std::string ParseFields(const Field* fields, size_t len, uint64_t value) {
+ bool first = true;
+ std::string s;
+ for (size_t i = 0; i < len; i++) {
+ const Field f = fields[i];
+ if ((value & f.mask) == f.value) {
+ if (!first) {
+ s += "|";
+ }
+ s += f.name;
+ first = false;
+ value &= ~f.mask;
+ }
+ }
+
+ if (value) {
+ if (!first) {
+ s += "|";
+ }
+ absl::StrAppend(&s, value);
+ }
+
+ return s;
+}
+
+const Field kIflagFields[] = {
+ {"IGNBRK", IGNBRK, IGNBRK}, {"BRKINT", BRKINT, BRKINT},
+ {"IGNPAR", IGNPAR, IGNPAR}, {"PARMRK", PARMRK, PARMRK},
+ {"INPCK", INPCK, INPCK}, {"ISTRIP", ISTRIP, ISTRIP},
+ {"INLCR", INLCR, INLCR}, {"IGNCR", IGNCR, IGNCR},
+ {"ICRNL", ICRNL, ICRNL}, {"IUCLC", IUCLC, IUCLC},
+ {"IXON", IXON, IXON}, {"IXANY", IXANY, IXANY},
+ {"IXOFF", IXOFF, IXOFF}, {"IMAXBEL", IMAXBEL, IMAXBEL},
+ {"IUTF8", IUTF8, IUTF8},
+};
+
+const Field kOflagFields[] = {
+ {"OPOST", OPOST, OPOST}, {"OLCUC", OLCUC, OLCUC},
+ {"ONLCR", ONLCR, ONLCR}, {"OCRNL", OCRNL, OCRNL},
+ {"ONOCR", ONOCR, ONOCR}, {"ONLRET", ONLRET, ONLRET},
+ {"OFILL", OFILL, OFILL}, {"OFDEL", OFDEL, OFDEL},
+ {"NL0", NLDLY, NL0}, {"NL1", NLDLY, NL1},
+ {"CR0", CRDLY, CR0}, {"CR1", CRDLY, CR1},
+ {"CR2", CRDLY, CR2}, {"CR3", CRDLY, CR3},
+ {"TAB0", TABDLY, TAB0}, {"TAB1", TABDLY, TAB1},
+ {"TAB2", TABDLY, TAB2}, {"TAB3", TABDLY, TAB3},
+ {"BS0", BSDLY, BS0}, {"BS1", BSDLY, BS1},
+ {"FF0", FFDLY, FF0}, {"FF1", FFDLY, FF1},
+ {"VT0", VTDLY, VT0}, {"VT1", VTDLY, VT1},
+ {"XTABS", XTABS, XTABS},
+};
+
+#ifndef IBSHIFT
+// Shift from CBAUD to CIBAUD.
+#define IBSHIFT 16
+#endif
+
+const Field kCflagFields[] = {
+ {"B0", CBAUD, B0},
+ {"B50", CBAUD, B50},
+ {"B75", CBAUD, B75},
+ {"B110", CBAUD, B110},
+ {"B134", CBAUD, B134},
+ {"B150", CBAUD, B150},
+ {"B200", CBAUD, B200},
+ {"B300", CBAUD, B300},
+ {"B600", CBAUD, B600},
+ {"B1200", CBAUD, B1200},
+ {"B1800", CBAUD, B1800},
+ {"B2400", CBAUD, B2400},
+ {"B4800", CBAUD, B4800},
+ {"B9600", CBAUD, B9600},
+ {"B19200", CBAUD, B19200},
+ {"B38400", CBAUD, B38400},
+ {"CS5", CSIZE, CS5},
+ {"CS6", CSIZE, CS6},
+ {"CS7", CSIZE, CS7},
+ {"CS8", CSIZE, CS8},
+ {"CSTOPB", CSTOPB, CSTOPB},
+ {"CREAD", CREAD, CREAD},
+ {"PARENB", PARENB, PARENB},
+ {"PARODD", PARODD, PARODD},
+ {"HUPCL", HUPCL, HUPCL},
+ {"CLOCAL", CLOCAL, CLOCAL},
+ {"B57600", CBAUD, B57600},
+ {"B115200", CBAUD, B115200},
+ {"B230400", CBAUD, B230400},
+ {"B460800", CBAUD, B460800},
+ {"B500000", CBAUD, B500000},
+ {"B576000", CBAUD, B576000},
+ {"B921600", CBAUD, B921600},
+ {"B1000000", CBAUD, B1000000},
+ {"B1152000", CBAUD, B1152000},
+ {"B1500000", CBAUD, B1500000},
+ {"B2000000", CBAUD, B2000000},
+ {"B2500000", CBAUD, B2500000},
+ {"B3000000", CBAUD, B3000000},
+ {"B3500000", CBAUD, B3500000},
+ {"B4000000", CBAUD, B4000000},
+ {"CMSPAR", CMSPAR, CMSPAR},
+ {"CRTSCTS", CRTSCTS, CRTSCTS},
+ {"IB0", CIBAUD, B0 << IBSHIFT},
+ {"IB50", CIBAUD, B50 << IBSHIFT},
+ {"IB75", CIBAUD, B75 << IBSHIFT},
+ {"IB110", CIBAUD, B110 << IBSHIFT},
+ {"IB134", CIBAUD, B134 << IBSHIFT},
+ {"IB150", CIBAUD, B150 << IBSHIFT},
+ {"IB200", CIBAUD, B200 << IBSHIFT},
+ {"IB300", CIBAUD, B300 << IBSHIFT},
+ {"IB600", CIBAUD, B600 << IBSHIFT},
+ {"IB1200", CIBAUD, B1200 << IBSHIFT},
+ {"IB1800", CIBAUD, B1800 << IBSHIFT},
+ {"IB2400", CIBAUD, B2400 << IBSHIFT},
+ {"IB4800", CIBAUD, B4800 << IBSHIFT},
+ {"IB9600", CIBAUD, B9600 << IBSHIFT},
+ {"IB19200", CIBAUD, B19200 << IBSHIFT},
+ {"IB38400", CIBAUD, B38400 << IBSHIFT},
+ {"IB57600", CIBAUD, B57600 << IBSHIFT},
+ {"IB115200", CIBAUD, B115200 << IBSHIFT},
+ {"IB230400", CIBAUD, B230400 << IBSHIFT},
+ {"IB460800", CIBAUD, B460800 << IBSHIFT},
+ {"IB500000", CIBAUD, B500000 << IBSHIFT},
+ {"IB576000", CIBAUD, B576000 << IBSHIFT},
+ {"IB921600", CIBAUD, B921600 << IBSHIFT},
+ {"IB1000000", CIBAUD, B1000000 << IBSHIFT},
+ {"IB1152000", CIBAUD, B1152000 << IBSHIFT},
+ {"IB1500000", CIBAUD, B1500000 << IBSHIFT},
+ {"IB2000000", CIBAUD, B2000000 << IBSHIFT},
+ {"IB2500000", CIBAUD, B2500000 << IBSHIFT},
+ {"IB3000000", CIBAUD, B3000000 << IBSHIFT},
+ {"IB3500000", CIBAUD, B3500000 << IBSHIFT},
+ {"IB4000000", CIBAUD, B4000000 << IBSHIFT},
+};
+
+const Field kLflagFields[] = {
+ {"ISIG", ISIG, ISIG}, {"ICANON", ICANON, ICANON},
+ {"XCASE", XCASE, XCASE}, {"ECHO", ECHO, ECHO},
+ {"ECHOE", ECHOE, ECHOE}, {"ECHOK", ECHOK, ECHOK},
+ {"ECHONL", ECHONL, ECHONL}, {"NOFLSH", NOFLSH, NOFLSH},
+ {"TOSTOP", TOSTOP, TOSTOP}, {"ECHOCTL", ECHOCTL, ECHOCTL},
+ {"ECHOPRT", ECHOPRT, ECHOPRT}, {"ECHOKE", ECHOKE, ECHOKE},
+ {"FLUSHO", FLUSHO, FLUSHO}, {"PENDIN", PENDIN, PENDIN},
+ {"IEXTEN", IEXTEN, IEXTEN}, {"EXTPROC", EXTPROC, EXTPROC},
+};
+
+std::string FormatCC(char c) {
+ if (isgraph(c)) {
+ return std::string(1, c);
+ } else if (c == ' ') {
+ return " ";
+ } else if (c == '\t') {
+ return "\\t";
+ } else if (c == '\r') {
+ return "\\r";
+ } else if (c == '\n') {
+ return "\\n";
+ } else if (c == '\0') {
+ return "\\0";
+ } else if (IsControlCharacter(c)) {
+ return absl::StrCat("^", std::string(1, FromControlCharacter(c)));
+ }
+ return absl::StrCat("\\x", absl::Hex(c));
+}
+
+std::ostream& operator<<(std::ostream& os, struct kernel_termios const& a) {
+ os << "{ c_iflag = "
+ << ParseFields(kIflagFields, ABSL_ARRAYSIZE(kIflagFields), a.c_iflag);
+ os << ", c_oflag = "
+ << ParseFields(kOflagFields, ABSL_ARRAYSIZE(kOflagFields), a.c_oflag);
+ os << ", c_cflag = "
+ << ParseFields(kCflagFields, ABSL_ARRAYSIZE(kCflagFields), a.c_cflag);
+ os << ", c_lflag = "
+ << ParseFields(kLflagFields, ABSL_ARRAYSIZE(kLflagFields), a.c_lflag);
+ os << ", c_line = " << a.c_line;
+ os << ", c_cc = { [VINTR] = '" << FormatCC(a.c_cc[VINTR]);
+ os << "', [VQUIT] = '" << FormatCC(a.c_cc[VQUIT]);
+ os << "', [VERASE] = '" << FormatCC(a.c_cc[VERASE]);
+ os << "', [VKILL] = '" << FormatCC(a.c_cc[VKILL]);
+ os << "', [VEOF] = '" << FormatCC(a.c_cc[VEOF]);
+ os << "', [VTIME] = '" << static_cast<int>(a.c_cc[VTIME]);
+ os << "', [VMIN] = " << static_cast<int>(a.c_cc[VMIN]);
+ os << ", [VSWTC] = '" << FormatCC(a.c_cc[VSWTC]);
+ os << "', [VSTART] = '" << FormatCC(a.c_cc[VSTART]);
+ os << "', [VSTOP] = '" << FormatCC(a.c_cc[VSTOP]);
+ os << "', [VSUSP] = '" << FormatCC(a.c_cc[VSUSP]);
+ os << "', [VEOL] = '" << FormatCC(a.c_cc[VEOL]);
+ os << "', [VREPRINT] = '" << FormatCC(a.c_cc[VREPRINT]);
+ os << "', [VDISCARD] = '" << FormatCC(a.c_cc[VDISCARD]);
+ os << "', [VWERASE] = '" << FormatCC(a.c_cc[VWERASE]);
+ os << "', [VLNEXT] = '" << FormatCC(a.c_cc[VLNEXT]);
+ os << "', [VEOL2] = '" << FormatCC(a.c_cc[VEOL2]);
+ os << "'}";
+ return os;
+}
+
+// Return the default termios settings for a new terminal.
+struct kernel_termios DefaultTermios() {
+ struct kernel_termios t = {};
+ t.c_iflag = IXON | ICRNL;
+ t.c_oflag = OPOST | ONLCR;
+ t.c_cflag = B38400 | CSIZE | CS8 | CREAD;
+ t.c_lflag = ISIG | ICANON | ECHO | ECHOE | ECHOK | ECHOCTL | ECHOKE | IEXTEN;
+ t.c_line = 0;
+ t.c_cc[VINTR] = ControlCharacter('C');
+ t.c_cc[VQUIT] = ControlCharacter('\\');
+ t.c_cc[VERASE] = '\x7f';
+ t.c_cc[VKILL] = ControlCharacter('U');
+ t.c_cc[VEOF] = ControlCharacter('D');
+ t.c_cc[VTIME] = '\0';
+ t.c_cc[VMIN] = 1;
+ t.c_cc[VSWTC] = '\0';
+ t.c_cc[VSTART] = ControlCharacter('Q');
+ t.c_cc[VSTOP] = ControlCharacter('S');
+ t.c_cc[VSUSP] = ControlCharacter('Z');
+ t.c_cc[VEOL] = '\0';
+ t.c_cc[VREPRINT] = ControlCharacter('R');
+ t.c_cc[VDISCARD] = ControlCharacter('O');
+ t.c_cc[VWERASE] = ControlCharacter('W');
+ t.c_cc[VLNEXT] = ControlCharacter('V');
+ t.c_cc[VEOL2] = '\0';
+ return t;
+}
+
+// PollAndReadFd tries to read count bytes from buf within timeout.
+//
+// Returns a partial read if some bytes were read.
+//
+// fd must be non-blocking.
+PosixErrorOr<size_t> PollAndReadFd(int fd, void* buf, size_t count,
+ absl::Duration timeout) {
+ absl::Time end = absl::Now() + timeout;
+
+ size_t completed = 0;
+ absl::Duration remaining;
+ while ((remaining = end - absl::Now()) > absl::ZeroDuration()) {
+ struct pollfd pfd = {fd, POLLIN, 0};
+ int ret = RetryEINTR(poll)(&pfd, 1, absl::ToInt64Milliseconds(remaining));
+ if (ret < 0) {
+ return PosixError(errno, "poll failed");
+ } else if (ret == 0) {
+ // Timed out.
+ continue;
+ } else if (ret != 1) {
+ return PosixError(EINVAL, absl::StrCat("Bad poll ret ", ret));
+ }
+
+ ssize_t n =
+ ReadFd(fd, static_cast<char*>(buf) + completed, count - completed);
+ if (n < 0) {
+ return PosixError(errno, "read failed");
+ }
+ completed += n;
+ if (completed >= count) {
+ return completed;
+ }
+ }
+
+ if (completed) {
+ return completed;
+ }
+ return PosixError(ETIMEDOUT, "Poll timed out");
+}
+
+// Opens the slave end of the passed master as R/W and nonblocking.
+PosixErrorOr<FileDescriptor> OpenSlave(const FileDescriptor& master) {
+ // Get pty index.
+ int n;
+ int ret = ioctl(master.get(), TIOCGPTN, &n);
+ if (ret < 0) {
+ return PosixError(errno, "ioctl(TIOCGPTN) failed");
+ }
+
+ // Unlock pts.
+ int unlock = 0;
+ ret = ioctl(master.get(), TIOCSPTLCK, &unlock);
+ if (ret < 0) {
+ return PosixError(errno, "ioctl(TIOSPTLCK) failed");
+ }
+
+ return Open(absl::StrCat("/dev/pts/", n), O_RDWR | O_NONBLOCK);
+}
+
+TEST(BasicPtyTest, StatUnopenedMaster) {
+ struct stat s;
+ ASSERT_THAT(stat("/dev/ptmx", &s), SyscallSucceeds());
+
+ EXPECT_EQ(s.st_rdev, makedev(TTYAUX_MAJOR, kPtmxMinor));
+ EXPECT_EQ(s.st_size, 0);
+ EXPECT_EQ(s.st_blocks, 0);
+
+ // ptmx attached to a specific devpts mount uses block size 1024. See
+ // fs/devpts/inode.c:devpts_fill_super.
+ //
+ // The global ptmx device uses the block size of the filesystem it is created
+ // on (which is usually 4096 for disk filesystems).
+ EXPECT_THAT(s.st_blksize, AnyOf(Eq(1024), Eq(4096)));
+}
+
+// Waits for count bytes to be readable from fd. Unlike poll, which can return
+// before all data is moved into a pty's read buffer, this function waits for
+// all count bytes to become readable.
+PosixErrorOr<int> WaitUntilReceived(int fd, int count) {
+ int buffered = -1;
+ absl::Duration remaining;
+ absl::Time end = absl::Now() + kTimeout;
+ while ((remaining = end - absl::Now()) > absl::ZeroDuration()) {
+ if (ioctl(fd, FIONREAD, &buffered) < 0) {
+ return PosixError(errno, "failed FIONREAD ioctl");
+ }
+ if (buffered >= count) {
+ return buffered;
+ }
+ absl::SleepFor(absl::Milliseconds(500));
+ }
+ return PosixError(
+ ETIMEDOUT,
+ absl::StrFormat(
+ "FIONREAD timed out, receiving only %d of %d expected bytes",
+ buffered, count));
+}
+
+// Verifies that there is nothing left to read from fd.
+void ExpectFinished(const FileDescriptor& fd) {
+ // Nothing more to read.
+ char c;
+ EXPECT_THAT(ReadFd(fd.get(), &c, 1), SyscallFailsWithErrno(EAGAIN));
+}
+
+// Verifies that we can read expected bytes from fd into buf.
+void ExpectReadable(const FileDescriptor& fd, int expected, char* buf) {
+ size_t n = ASSERT_NO_ERRNO_AND_VALUE(
+ PollAndReadFd(fd.get(), buf, expected, kTimeout));
+ EXPECT_EQ(expected, n);
+}
+
+TEST(BasicPtyTest, OpenMasterSlave) {
+ FileDescriptor master = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR));
+ FileDescriptor slave = ASSERT_NO_ERRNO_AND_VALUE(OpenSlave(master));
+}
+
+// The slave entry in /dev/pts/ disappears when the master is closed, even if
+// the slave is still open.
+TEST(BasicPtyTest, SlaveEntryGoneAfterMasterClose) {
+ FileDescriptor master = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR));
+ FileDescriptor slave = ASSERT_NO_ERRNO_AND_VALUE(OpenSlave(master));
+
+ // Get pty index.
+ int index = -1;
+ ASSERT_THAT(ioctl(master.get(), TIOCGPTN, &index), SyscallSucceeds());
+
+ std::string path = absl::StrCat("/dev/pts/", index);
+
+ struct stat st;
+ EXPECT_THAT(stat(path.c_str(), &st), SyscallSucceeds());
+
+ master.reset();
+
+ EXPECT_THAT(stat(path.c_str(), &st), SyscallFailsWithErrno(ENOENT));
+}
+
+TEST(BasicPtyTest, Getdents) {
+ FileDescriptor master1 = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR));
+ int index1 = -1;
+ ASSERT_THAT(ioctl(master1.get(), TIOCGPTN, &index1), SyscallSucceeds());
+ FileDescriptor slave1 = ASSERT_NO_ERRNO_AND_VALUE(OpenSlave(master1));
+
+ FileDescriptor master2 = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR));
+ int index2 = -1;
+ ASSERT_THAT(ioctl(master2.get(), TIOCGPTN, &index2), SyscallSucceeds());
+ FileDescriptor slave2 = ASSERT_NO_ERRNO_AND_VALUE(OpenSlave(master2));
+
+ // The directory contains ptmx, index1, and index2. (Plus any additional PTYs
+ // unrelated to this test.)
+
+ std::vector<std::string> contents =
+ ASSERT_NO_ERRNO_AND_VALUE(ListDir("/dev/pts/", true));
+ EXPECT_THAT(contents, Contains(absl::StrCat(index1)));
+ EXPECT_THAT(contents, Contains(absl::StrCat(index2)));
+
+ master2.reset();
+
+ // The directory contains ptmx and index1, but not index2 since the master is
+ // closed. (Plus any additional PTYs unrelated to this test.)
+
+ contents = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/dev/pts/", true));
+ EXPECT_THAT(contents, Contains(absl::StrCat(index1)));
+ EXPECT_THAT(contents, Not(Contains(absl::StrCat(index2))));
+
+ // N.B. devpts supports legacy "single-instance" mode and new "multi-instance"
+ // mode. In legacy mode, devpts does not contain a "ptmx" device (the distro
+ // must use mknod to create it somewhere, presumably /dev/ptmx).
+ // Multi-instance mode does include a "ptmx" device tied to that mount.
+ //
+ // We don't check for the presence or absence of "ptmx", as distros vary in
+ // their usage of the two modes.
+}
+
+class PtyTest : public ::testing::Test {
+ protected:
+ void SetUp() override {
+ master_ = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/ptmx", O_RDWR | O_NONBLOCK));
+ slave_ = ASSERT_NO_ERRNO_AND_VALUE(OpenSlave(master_));
+ }
+
+ void DisableCanonical() {
+ struct kernel_termios t = {};
+ EXPECT_THAT(ioctl(slave_.get(), TCGETS, &t), SyscallSucceeds());
+ t.c_lflag &= ~ICANON;
+ EXPECT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds());
+ }
+
+ void EnableCanonical() {
+ struct kernel_termios t = {};
+ EXPECT_THAT(ioctl(slave_.get(), TCGETS, &t), SyscallSucceeds());
+ t.c_lflag |= ICANON;
+ EXPECT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds());
+ }
+
+ // Master and slave ends of the PTY. Non-blocking.
+ FileDescriptor master_;
+ FileDescriptor slave_;
+};
+
+// Master to slave sanity test.
+TEST_F(PtyTest, WriteMasterToSlave) {
+ // N.B. by default, the slave reads nothing until the master writes a newline.
+ constexpr char kBuf[] = "hello\n";
+
+ EXPECT_THAT(WriteFd(master_.get(), kBuf, sizeof(kBuf) - 1),
+ SyscallSucceedsWithValue(sizeof(kBuf) - 1));
+
+ // Linux moves data from the master to the slave via async work scheduled via
+ // tty_flip_buffer_push. Since it is asynchronous, the data may not be
+ // available for reading immediately. Instead we must poll and assert that it
+ // becomes available "soon".
+
+ char buf[sizeof(kBuf)] = {};
+ ExpectReadable(slave_, sizeof(buf) - 1, buf);
+
+ EXPECT_EQ(memcmp(buf, kBuf, sizeof(kBuf)), 0);
+}
+
+// Slave to master sanity test.
+TEST_F(PtyTest, WriteSlaveToMaster) {
+ // N.B. by default, the master reads nothing until the slave writes a newline,
+ // and the master gets a carriage return.
+ constexpr char kInput[] = "hello\n";
+ constexpr char kExpected[] = "hello\r\n";
+
+ EXPECT_THAT(WriteFd(slave_.get(), kInput, sizeof(kInput) - 1),
+ SyscallSucceedsWithValue(sizeof(kInput) - 1));
+
+ // Linux moves data from the master to the slave via async work scheduled via
+ // tty_flip_buffer_push. Since it is asynchronous, the data may not be
+ // available for reading immediately. Instead we must poll and assert that it
+ // becomes available "soon".
+
+ char buf[sizeof(kExpected)] = {};
+ ExpectReadable(master_, sizeof(buf) - 1, buf);
+
+ EXPECT_EQ(memcmp(buf, kExpected, sizeof(kExpected)), 0);
+}
+
+// Both the master and slave report the standard default termios settings.
+//
+// Note that TCGETS on the master actually redirects to the slave (see comment
+// on MasterTermiosUnchangable).
+TEST_F(PtyTest, DefaultTermios) {
+ struct kernel_termios t = {};
+ EXPECT_THAT(ioctl(slave_.get(), TCGETS, &t), SyscallSucceeds());
+ EXPECT_EQ(t, DefaultTermios());
+
+ EXPECT_THAT(ioctl(master_.get(), TCGETS, &t), SyscallSucceeds());
+ EXPECT_EQ(t, DefaultTermios());
+}
+
+// Changing termios from the master actually affects the slave.
+//
+// TCSETS on the master actually redirects to the slave (see comment on
+// MasterTermiosUnchangable).
+TEST_F(PtyTest, TermiosAffectsSlave) {
+ struct kernel_termios master_termios = {};
+ EXPECT_THAT(ioctl(master_.get(), TCGETS, &master_termios), SyscallSucceeds());
+ master_termios.c_lflag ^= ICANON;
+ EXPECT_THAT(ioctl(master_.get(), TCSETS, &master_termios), SyscallSucceeds());
+
+ struct kernel_termios slave_termios = {};
+ EXPECT_THAT(ioctl(slave_.get(), TCGETS, &slave_termios), SyscallSucceeds());
+ EXPECT_EQ(master_termios, slave_termios);
+}
+
+// The master end of the pty has termios:
+//
+// struct kernel_termios t = {
+// .c_iflag = 0;
+// .c_oflag = 0;
+// .c_cflag = B38400 | CS8 | CREAD;
+// .c_lflag = 0;
+// .c_cc = /* same as DefaultTermios */
+// }
+//
+// (From drivers/tty/pty.c:unix98_pty_init)
+//
+// All termios control ioctls on the master actually redirect to the slave
+// (drivers/tty/tty_ioctl.c:tty_mode_ioctl), making it impossible to change the
+// master termios.
+//
+// Verify this by setting ICRNL (which rewrites input \r to \n) and verify that
+// it has no effect on the master.
+TEST_F(PtyTest, MasterTermiosUnchangable) {
+ char c = '\r';
+ ASSERT_THAT(WriteFd(slave_.get(), &c, 1), SyscallSucceedsWithValue(1));
+
+ ExpectReadable(master_, 1, &c);
+ EXPECT_EQ(c, '\r'); // ICRNL had no effect!
+
+ ExpectFinished(master_);
+}
+
+// ICRNL rewrites input \r to \n.
+TEST_F(PtyTest, TermiosICRNL) {
+ struct kernel_termios t = DefaultTermios();
+ t.c_iflag |= ICRNL;
+ t.c_lflag &= ~ICANON; // for byte-by-byte reading.
+ ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds());
+
+ char c = '\r';
+ ASSERT_THAT(WriteFd(master_.get(), &c, 1), SyscallSucceedsWithValue(1));
+
+ ExpectReadable(slave_, 1, &c);
+ EXPECT_EQ(c, '\n');
+
+ ExpectFinished(slave_);
+}
+
+// ONLCR rewrites output \n to \r\n.
+TEST_F(PtyTest, TermiosONLCR) {
+ struct kernel_termios t = DefaultTermios();
+ t.c_oflag |= ONLCR;
+ t.c_lflag &= ~ICANON; // for byte-by-byte reading.
+ ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds());
+
+ char c = '\n';
+ ASSERT_THAT(WriteFd(slave_.get(), &c, 1), SyscallSucceedsWithValue(1));
+
+ // Extra byte for NUL for EXPECT_STREQ.
+ char buf[3] = {};
+ ExpectReadable(master_, 2, buf);
+ EXPECT_STREQ(buf, "\r\n");
+
+ ExpectFinished(slave_);
+}
+
+TEST_F(PtyTest, TermiosIGNCR) {
+ struct kernel_termios t = DefaultTermios();
+ t.c_iflag |= IGNCR;
+ t.c_lflag &= ~ICANON; // for byte-by-byte reading.
+ ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds());
+
+ char c = '\r';
+ ASSERT_THAT(WriteFd(master_.get(), &c, 1), SyscallSucceedsWithValue(1));
+
+ // Nothing to read.
+ ASSERT_THAT(PollAndReadFd(slave_.get(), &c, 1, kTimeout),
+ PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out")));
+}
+
+// Test that we can successfully poll for readable data from the slave.
+TEST_F(PtyTest, TermiosPollSlave) {
+ struct kernel_termios t = DefaultTermios();
+ t.c_iflag |= IGNCR;
+ t.c_lflag &= ~ICANON; // for byte-by-byte reading.
+ ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds());
+
+ absl::Notification notify;
+ int sfd = slave_.get();
+ ScopedThread th([sfd, &notify]() {
+ notify.Notify();
+
+ // Poll on the reader fd with POLLIN event.
+ struct pollfd poll_fd = {sfd, POLLIN, 0};
+ EXPECT_THAT(
+ RetryEINTR(poll)(&poll_fd, 1, absl::ToInt64Milliseconds(kTimeout)),
+ SyscallSucceedsWithValue(1));
+
+ // Should trigger POLLIN event.
+ EXPECT_EQ(poll_fd.revents & POLLIN, POLLIN);
+ });
+
+ notify.WaitForNotification();
+ // Sleep ensures that poll begins waiting before we write to the FD.
+ absl::SleepFor(absl::Seconds(1));
+
+ char s[] = "foo\n";
+ ASSERT_THAT(WriteFd(master_.get(), s, strlen(s) + 1), SyscallSucceeds());
+}
+
+// Test that we can successfully poll for readable data from the master.
+TEST_F(PtyTest, TermiosPollMaster) {
+ struct kernel_termios t = DefaultTermios();
+ t.c_iflag |= IGNCR;
+ t.c_lflag &= ~ICANON; // for byte-by-byte reading.
+ ASSERT_THAT(ioctl(master_.get(), TCSETS, &t), SyscallSucceeds());
+
+ absl::Notification notify;
+ int mfd = master_.get();
+ ScopedThread th([mfd, &notify]() {
+ notify.Notify();
+
+ // Poll on the reader fd with POLLIN event.
+ struct pollfd poll_fd = {mfd, POLLIN, 0};
+ EXPECT_THAT(
+ RetryEINTR(poll)(&poll_fd, 1, absl::ToInt64Milliseconds(kTimeout)),
+ SyscallSucceedsWithValue(1));
+
+ // Should trigger POLLIN event.
+ EXPECT_EQ(poll_fd.revents & POLLIN, POLLIN);
+ });
+
+ notify.WaitForNotification();
+ // Sleep ensures that poll begins waiting before we write to the FD.
+ absl::SleepFor(absl::Seconds(1));
+
+ char s[] = "foo\n";
+ ASSERT_THAT(WriteFd(slave_.get(), s, strlen(s) + 1), SyscallSucceeds());
+}
+
+TEST_F(PtyTest, TermiosINLCR) {
+ struct kernel_termios t = DefaultTermios();
+ t.c_iflag |= INLCR;
+ t.c_lflag &= ~ICANON; // for byte-by-byte reading.
+ ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds());
+
+ char c = '\n';
+ ASSERT_THAT(WriteFd(master_.get(), &c, 1), SyscallSucceedsWithValue(1));
+
+ ExpectReadable(slave_, 1, &c);
+ EXPECT_EQ(c, '\r');
+
+ ExpectFinished(slave_);
+}
+
+TEST_F(PtyTest, TermiosONOCR) {
+ struct kernel_termios t = DefaultTermios();
+ t.c_oflag |= ONOCR;
+ t.c_lflag &= ~ICANON; // for byte-by-byte reading.
+ ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds());
+
+ // The terminal is at column 0, so there should be no CR to read.
+ char c = '\r';
+ ASSERT_THAT(WriteFd(slave_.get(), &c, 1), SyscallSucceedsWithValue(1));
+
+ // Nothing to read.
+ ASSERT_THAT(PollAndReadFd(master_.get(), &c, 1, kTimeout),
+ PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out")));
+
+ // This time the column is greater than 0, so we should be able to read the CR
+ // out of the other end.
+ constexpr char kInput[] = "foo\r";
+ constexpr int kInputSize = sizeof(kInput) - 1;
+ ASSERT_THAT(WriteFd(slave_.get(), kInput, kInputSize),
+ SyscallSucceedsWithValue(kInputSize));
+
+ char buf[kInputSize] = {};
+ ExpectReadable(master_, kInputSize, buf);
+
+ EXPECT_EQ(memcmp(buf, kInput, kInputSize), 0);
+
+ ExpectFinished(master_);
+
+ // Terminal should be at column 0 again, so no CR can be read.
+ ASSERT_THAT(WriteFd(slave_.get(), &c, 1), SyscallSucceedsWithValue(1));
+
+ // Nothing to read.
+ ASSERT_THAT(PollAndReadFd(master_.get(), &c, 1, kTimeout),
+ PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out")));
+}
+
+TEST_F(PtyTest, TermiosOCRNL) {
+ struct kernel_termios t = DefaultTermios();
+ t.c_oflag |= OCRNL;
+ t.c_lflag &= ~ICANON; // for byte-by-byte reading.
+ ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds());
+
+ // The terminal is at column 0, so there should be no CR to read.
+ char c = '\r';
+ ASSERT_THAT(WriteFd(slave_.get(), &c, 1), SyscallSucceedsWithValue(1));
+
+ ExpectReadable(master_, 1, &c);
+ EXPECT_EQ(c, '\n');
+
+ ExpectFinished(master_);
+}
+
+// Tests that VEOL is disabled when we start, and that we can set it to enable
+// it.
+TEST_F(PtyTest, VEOLTermination) {
+ // Write a few bytes ending with '\0', and confirm that we can't read.
+ constexpr char kInput[] = "hello";
+ ASSERT_THAT(WriteFd(master_.get(), kInput, sizeof(kInput)),
+ SyscallSucceedsWithValue(sizeof(kInput)));
+ char buf[sizeof(kInput)] = {};
+ ASSERT_THAT(PollAndReadFd(slave_.get(), buf, sizeof(kInput), kTimeout),
+ PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out")));
+
+ // Set the EOL character to '=' and write it.
+ constexpr char delim = '=';
+ struct kernel_termios t = DefaultTermios();
+ t.c_cc[VEOL] = delim;
+ ASSERT_THAT(ioctl(slave_.get(), TCSETS, &t), SyscallSucceeds());
+ ASSERT_THAT(WriteFd(master_.get(), &delim, 1), SyscallSucceedsWithValue(1));
+
+ // Now we can read, as sending EOL caused the line to become available.
+ ExpectReadable(slave_, sizeof(kInput), buf);
+ EXPECT_EQ(memcmp(buf, kInput, sizeof(kInput)), 0);
+
+ ExpectReadable(slave_, 1, buf);
+ EXPECT_EQ(buf[0], '=');
+
+ ExpectFinished(slave_);
+}
+
+// Tests that we can write more than the 4096 character limit, then a
+// terminating character, then read out just the first 4095 bytes plus the
+// terminator.
+TEST_F(PtyTest, CanonBigWrite) {
+ constexpr int kWriteLen = kMaxLineSize + 4;
+ char input[kWriteLen];
+ memset(input, 'M', kWriteLen - 1);
+ input[kWriteLen - 1] = '\n';
+ ASSERT_THAT(WriteFd(master_.get(), input, kWriteLen),
+ SyscallSucceedsWithValue(kWriteLen));
+
+ // We can read the line.
+ char buf[kMaxLineSize] = {};
+ ExpectReadable(slave_, kMaxLineSize, buf);
+
+ ExpectFinished(slave_);
+}
+
+// Tests that data written in canonical mode can be read immediately once
+// switched to noncanonical mode.
+TEST_F(PtyTest, SwitchCanonToNoncanon) {
+ // Write a few bytes without a terminating character, switch to noncanonical
+ // mode, and read them.
+ constexpr char kInput[] = "hello";
+ ASSERT_THAT(WriteFd(master_.get(), kInput, sizeof(kInput)),
+ SyscallSucceedsWithValue(sizeof(kInput)));
+
+ // Nothing available yet.
+ char buf[sizeof(kInput)] = {};
+ ASSERT_THAT(PollAndReadFd(slave_.get(), buf, sizeof(kInput), kTimeout),
+ PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out")));
+
+ DisableCanonical();
+
+ ExpectReadable(slave_, sizeof(kInput), buf);
+ EXPECT_STREQ(buf, kInput);
+
+ ExpectFinished(slave_);
+}
+
+TEST_F(PtyTest, SwitchCanonToNonCanonNewline) {
+ // Write a few bytes with a terminating character.
+ constexpr char kInput[] = "hello\n";
+ ASSERT_THAT(WriteFd(master_.get(), kInput, sizeof(kInput)),
+ SyscallSucceedsWithValue(sizeof(kInput)));
+
+ DisableCanonical();
+
+ // We can read the line.
+ char buf[sizeof(kInput)] = {};
+ ExpectReadable(slave_, sizeof(kInput), buf);
+ EXPECT_STREQ(buf, kInput);
+
+ ExpectFinished(slave_);
+}
+
+TEST_F(PtyTest, SwitchNoncanonToCanonNewlineBig) {
+ DisableCanonical();
+
+ // Write more than the maximum line size, then write a delimiter.
+ constexpr int kWriteLen = 4100;
+ char input[kWriteLen];
+ memset(input, 'M', kWriteLen);
+ ASSERT_THAT(WriteFd(master_.get(), input, kWriteLen),
+ SyscallSucceedsWithValue(kWriteLen));
+ // Wait for the input queue to fill.
+ ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), kMaxLineSize - 1));
+ constexpr char delim = '\n';
+ ASSERT_THAT(WriteFd(master_.get(), &delim, 1), SyscallSucceedsWithValue(1));
+
+ EnableCanonical();
+
+ // We can read the line.
+ char buf[kMaxLineSize] = {};
+ ExpectReadable(slave_, kMaxLineSize - 1, buf);
+
+ // We can also read the remaining characters.
+ ExpectReadable(slave_, 6, buf);
+
+ ExpectFinished(slave_);
+}
+
+TEST_F(PtyTest, SwitchNoncanonToCanonNoNewline) {
+ DisableCanonical();
+
+ // Write a few bytes without a terminating character.
+ // mode, and read them.
+ constexpr char kInput[] = "hello";
+ ASSERT_THAT(WriteFd(master_.get(), kInput, sizeof(kInput) - 1),
+ SyscallSucceedsWithValue(sizeof(kInput) - 1));
+
+ ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), sizeof(kInput) - 1));
+ EnableCanonical();
+
+ // We can read the line.
+ char buf[sizeof(kInput)] = {};
+ ExpectReadable(slave_, sizeof(kInput) - 1, buf);
+ EXPECT_STREQ(buf, kInput);
+
+ ExpectFinished(slave_);
+}
+
+TEST_F(PtyTest, SwitchNoncanonToCanonNoNewlineBig) {
+ DisableCanonical();
+
+ // Write a few bytes without a terminating character.
+ // mode, and read them.
+ constexpr int kWriteLen = 4100;
+ char input[kWriteLen];
+ memset(input, 'M', kWriteLen);
+ ASSERT_THAT(WriteFd(master_.get(), input, kWriteLen),
+ SyscallSucceedsWithValue(kWriteLen));
+
+ ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), kMaxLineSize - 1));
+ EnableCanonical();
+
+ // We can read the line.
+ char buf[kMaxLineSize] = {};
+ ExpectReadable(slave_, kMaxLineSize - 1, buf);
+
+ ExpectFinished(slave_);
+}
+
+// Tests that we can write over the 4095 noncanonical limit, then read out
+// everything.
+TEST_F(PtyTest, NoncanonBigWrite) {
+ DisableCanonical();
+
+ // Write well over the 4095 internal buffer limit.
+ constexpr char kInput = 'M';
+ constexpr int kInputSize = kMaxLineSize * 2;
+ for (int i = 0; i < kInputSize; i++) {
+ // This makes too many syscalls for save/restore.
+ const DisableSave ds;
+ ASSERT_THAT(WriteFd(master_.get(), &kInput, sizeof(kInput)),
+ SyscallSucceedsWithValue(sizeof(kInput)));
+ }
+
+ // We should be able to read out everything. Sleep a bit so that Linux has a
+ // chance to move data from the master to the slave.
+ ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), kMaxLineSize - 1));
+ for (int i = 0; i < kInputSize; i++) {
+ // This makes too many syscalls for save/restore.
+ const DisableSave ds;
+ char c;
+ ExpectReadable(slave_, 1, &c);
+ ASSERT_EQ(c, kInput);
+ }
+
+ ExpectFinished(slave_);
+}
+
+// ICANON doesn't make input available until a line delimiter is typed.
+//
+// Test newline.
+TEST_F(PtyTest, TermiosICANONNewline) {
+ char input[3] = {'a', 'b', 'c'};
+ ASSERT_THAT(WriteFd(master_.get(), input, sizeof(input)),
+ SyscallSucceedsWithValue(sizeof(input)));
+
+ // Extra bytes for newline (written later) and NUL for EXPECT_STREQ.
+ char buf[5] = {};
+
+ // Nothing available yet.
+ ASSERT_THAT(PollAndReadFd(slave_.get(), buf, sizeof(input), kTimeout),
+ PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out")));
+
+ char delim = '\n';
+ ASSERT_THAT(WriteFd(master_.get(), &delim, 1), SyscallSucceedsWithValue(1));
+
+ // Now it is available.
+ ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), sizeof(input) + 1));
+ ExpectReadable(slave_, sizeof(input) + 1, buf);
+ EXPECT_STREQ(buf, "abc\n");
+
+ ExpectFinished(slave_);
+}
+
+// ICANON doesn't make input available until a line delimiter is typed.
+//
+// Test EOF (^D).
+TEST_F(PtyTest, TermiosICANONEOF) {
+ char input[3] = {'a', 'b', 'c'};
+ ASSERT_THAT(WriteFd(master_.get(), input, sizeof(input)),
+ SyscallSucceedsWithValue(sizeof(input)));
+
+ // Extra byte for NUL for EXPECT_STREQ.
+ char buf[4] = {};
+
+ // Nothing available yet.
+ ASSERT_THAT(PollAndReadFd(slave_.get(), buf, sizeof(input), kTimeout),
+ PosixErrorIs(ETIMEDOUT, ::testing::StrEq("Poll timed out")));
+ char delim = ControlCharacter('D');
+ ASSERT_THAT(WriteFd(master_.get(), &delim, 1), SyscallSucceedsWithValue(1));
+
+ // Now it is available. Note that ^D is not included.
+ ExpectReadable(slave_, sizeof(input), buf);
+ EXPECT_STREQ(buf, "abc");
+
+ ExpectFinished(slave_);
+}
+
+// ICANON limits us to 4096 bytes including a terminating character. Anything
+// after and 4095th character is discarded (although still processed for
+// signals and echoing).
+TEST_F(PtyTest, CanonDiscard) {
+ constexpr char kInput = 'M';
+ constexpr int kInputSize = 4100;
+ constexpr int kIter = 3;
+
+ // A few times write more than the 4096 character maximum, then a newline.
+ constexpr char delim = '\n';
+ for (int i = 0; i < kIter; i++) {
+ // This makes too many syscalls for save/restore.
+ const DisableSave ds;
+ for (int i = 0; i < kInputSize; i++) {
+ ASSERT_THAT(WriteFd(master_.get(), &kInput, sizeof(kInput)),
+ SyscallSucceedsWithValue(sizeof(kInput)));
+ }
+ ASSERT_THAT(WriteFd(master_.get(), &delim, 1), SyscallSucceedsWithValue(1));
+ }
+
+ // There should be multiple truncated lines available to read.
+ for (int i = 0; i < kIter; i++) {
+ char buf[kInputSize] = {};
+ ExpectReadable(slave_, kMaxLineSize, buf);
+ EXPECT_EQ(buf[kMaxLineSize - 1], delim);
+ EXPECT_EQ(buf[kMaxLineSize - 2], kInput);
+ }
+
+ ExpectFinished(slave_);
+}
+
+TEST_F(PtyTest, CanonMultiline) {
+ constexpr char kInput1[] = "GO\n";
+ constexpr char kInput2[] = "BLUE\n";
+
+ // Write both lines.
+ ASSERT_THAT(WriteFd(master_.get(), kInput1, sizeof(kInput1) - 1),
+ SyscallSucceedsWithValue(sizeof(kInput1) - 1));
+ ASSERT_THAT(WriteFd(master_.get(), kInput2, sizeof(kInput2) - 1),
+ SyscallSucceedsWithValue(sizeof(kInput2) - 1));
+
+ // Get the first line.
+ char line1[8] = {};
+ ExpectReadable(slave_, sizeof(kInput1) - 1, line1);
+ EXPECT_STREQ(line1, kInput1);
+
+ // Get the second line.
+ char line2[8] = {};
+ ExpectReadable(slave_, sizeof(kInput2) - 1, line2);
+ EXPECT_STREQ(line2, kInput2);
+
+ ExpectFinished(slave_);
+}
+
+TEST_F(PtyTest, SwitchNoncanonToCanonMultiline) {
+ DisableCanonical();
+
+ constexpr char kInput1[] = "GO\n";
+ constexpr char kInput2[] = "BLUE\n";
+ constexpr char kExpected[] = "GO\nBLUE\n";
+
+ // Write both lines.
+ ASSERT_THAT(WriteFd(master_.get(), kInput1, sizeof(kInput1) - 1),
+ SyscallSucceedsWithValue(sizeof(kInput1) - 1));
+ ASSERT_THAT(WriteFd(master_.get(), kInput2, sizeof(kInput2) - 1),
+ SyscallSucceedsWithValue(sizeof(kInput2) - 1));
+
+ ASSERT_NO_ERRNO(
+ WaitUntilReceived(slave_.get(), sizeof(kInput1) + sizeof(kInput2) - 2));
+ EnableCanonical();
+
+ // Get all together as one line.
+ char line[9] = {};
+ ExpectReadable(slave_, 8, line);
+ EXPECT_STREQ(line, kExpected);
+
+ ExpectFinished(slave_);
+}
+
+TEST_F(PtyTest, SwitchTwiceMultiline) {
+ std::string kInputs[] = {"GO\n", "BLUE\n", "!"};
+ std::string kExpected = "GO\nBLUE\n!";
+
+ // Write each line.
+ for (std::string input : kInputs) {
+ ASSERT_THAT(WriteFd(master_.get(), input.c_str(), input.size()),
+ SyscallSucceedsWithValue(input.size()));
+ }
+
+ DisableCanonical();
+ // All written characters have to make it into the input queue before
+ // canonical mode is re-enabled. If the final '!' character hasn't been
+ // enqueued before canonical mode is re-enabled, it won't be readable.
+ ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), kExpected.size()));
+ EnableCanonical();
+
+ // Get all together as one line.
+ char line[10] = {};
+ ExpectReadable(slave_, 9, line);
+ EXPECT_STREQ(line, kExpected.c_str());
+
+ ExpectFinished(slave_);
+}
+
+TEST_F(PtyTest, QueueSize) {
+ // Write the line.
+ constexpr char kInput1[] = "GO\n";
+ ASSERT_THAT(WriteFd(master_.get(), kInput1, sizeof(kInput1) - 1),
+ SyscallSucceedsWithValue(sizeof(kInput1) - 1));
+ ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), sizeof(kInput1) - 1));
+
+ // Ensure that writing more (beyond what is readable) does not impact the
+ // readable size.
+ char input[kMaxLineSize];
+ memset(input, 'M', kMaxLineSize);
+ ASSERT_THAT(WriteFd(master_.get(), input, kMaxLineSize),
+ SyscallSucceedsWithValue(kMaxLineSize));
+ int inputBufSize = ASSERT_NO_ERRNO_AND_VALUE(
+ WaitUntilReceived(slave_.get(), sizeof(kInput1) - 1));
+ EXPECT_EQ(inputBufSize, sizeof(kInput1) - 1);
+}
+
+TEST_F(PtyTest, PartialBadBuffer) {
+ // Allocate 2 pages.
+ void* addr = mmap(nullptr, 2 * kPageSize, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(addr, MAP_FAILED);
+ char* buf = reinterpret_cast<char*>(addr);
+
+ // Guard the 2nd page for our read to run into.
+ ASSERT_THAT(
+ mprotect(reinterpret_cast<void*>(buf + kPageSize), kPageSize, PROT_NONE),
+ SyscallSucceeds());
+
+ // Leave only one free byte in the buffer.
+ char* bad_buffer = buf + kPageSize - 1;
+
+ // Write to the master.
+ constexpr char kBuf[] = "hello\n";
+ constexpr size_t size = sizeof(kBuf) - 1;
+ EXPECT_THAT(WriteFd(master_.get(), kBuf, size),
+ SyscallSucceedsWithValue(size));
+
+ // Read from the slave into bad_buffer.
+ ASSERT_NO_ERRNO(WaitUntilReceived(slave_.get(), size));
+ EXPECT_THAT(ReadFd(slave_.get(), bad_buffer, size),
+ SyscallFailsWithErrno(EFAULT));
+
+ EXPECT_THAT(munmap(addr, 2 * kPageSize), SyscallSucceeds()) << addr;
+}
+
+TEST_F(PtyTest, SimpleEcho) {
+ constexpr char kInput[] = "Mr. Eko";
+ EXPECT_THAT(WriteFd(master_.get(), kInput, strlen(kInput)),
+ SyscallSucceedsWithValue(strlen(kInput)));
+
+ char buf[100] = {};
+ ExpectReadable(master_, strlen(kInput), buf);
+
+ EXPECT_STREQ(buf, kInput);
+ ExpectFinished(master_);
+}
+
+TEST_F(PtyTest, GetWindowSize) {
+ struct winsize ws;
+ ASSERT_THAT(ioctl(slave_.get(), TIOCGWINSZ, &ws), SyscallSucceeds());
+ EXPECT_EQ(ws.ws_row, 0);
+ EXPECT_EQ(ws.ws_col, 0);
+}
+
+TEST_F(PtyTest, SetSlaveWindowSize) {
+ constexpr uint16_t kRows = 343;
+ constexpr uint16_t kCols = 2401;
+ struct winsize ws = {.ws_row = kRows, .ws_col = kCols};
+ ASSERT_THAT(ioctl(slave_.get(), TIOCSWINSZ, &ws), SyscallSucceeds());
+
+ struct winsize retrieved_ws = {};
+ ASSERT_THAT(ioctl(master_.get(), TIOCGWINSZ, &retrieved_ws),
+ SyscallSucceeds());
+ EXPECT_EQ(retrieved_ws.ws_row, kRows);
+ EXPECT_EQ(retrieved_ws.ws_col, kCols);
+}
+
+TEST_F(PtyTest, SetMasterWindowSize) {
+ constexpr uint16_t kRows = 343;
+ constexpr uint16_t kCols = 2401;
+ struct winsize ws = {.ws_row = kRows, .ws_col = kCols};
+ ASSERT_THAT(ioctl(master_.get(), TIOCSWINSZ, &ws), SyscallSucceeds());
+
+ struct winsize retrieved_ws = {};
+ ASSERT_THAT(ioctl(slave_.get(), TIOCGWINSZ, &retrieved_ws),
+ SyscallSucceeds());
+ EXPECT_EQ(retrieved_ws.ws_row, kRows);
+ EXPECT_EQ(retrieved_ws.ws_col, kCols);
+}
+
+} // namespace
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/pwrite64.cc b/test/syscalls/linux/pwrite64.cc
new file mode 100644
index 000000000..60ae6de1f
--- /dev/null
+++ b/test/syscalls/linux/pwrite64.cc
@@ -0,0 +1,79 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// This test is currently very rudimentary.
+//
+// TODO:
+// * bad buffer states (EFAULT).
+// * bad fds (wrong permission, wrong type of file, EBADF).
+// * check offset is not incremented.
+// * check for EOF.
+// * writing to pipes, symlinks, special files.
+class Pwrite64 : public ::testing::Test {
+ void SetUp() override {
+ name_ = NewTempAbsPath();
+ int fd;
+ ASSERT_THAT(fd = open(name_.c_str(), O_CREAT, 0644), SyscallSucceeds());
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+ }
+
+ void TearDown() override { unlink(name_.c_str()); }
+
+ public:
+ std::string name_;
+};
+
+TEST_F(Pwrite64, AppendOnly) {
+ int fd;
+ ASSERT_THAT(fd = open(name_.c_str(), O_APPEND | O_RDWR), SyscallSucceeds());
+ constexpr int64_t kBufSize = 1024;
+ std::vector<char> buf(kBufSize);
+ std::fill(buf.begin(), buf.end(), 'a');
+ EXPECT_THAT(PwriteFd(fd, buf.data(), buf.size(), 0),
+ SyscallSucceedsWithValue(buf.size()));
+ EXPECT_THAT(lseek(fd, 0, SEEK_CUR), SyscallSucceedsWithValue(0));
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+TEST_F(Pwrite64, InvalidArgs) {
+ int fd;
+ ASSERT_THAT(fd = open(name_.c_str(), O_APPEND | O_RDWR), SyscallSucceeds());
+ constexpr int64_t kBufSize = 1024;
+ std::vector<char> buf(kBufSize);
+ std::fill(buf.begin(), buf.end(), 'a');
+ EXPECT_THAT(PwriteFd(fd, buf.data(), buf.size(), -1),
+ SyscallFailsWithErrno(EINVAL));
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/read.cc b/test/syscalls/linux/read.cc
new file mode 100644
index 000000000..eb1b5bc10
--- /dev/null
+++ b/test/syscalls/linux/read.cc
@@ -0,0 +1,117 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+class ReadTest : public ::testing::Test {
+ void SetUp() override {
+ name_ = NewTempAbsPath();
+ int fd;
+ ASSERT_THAT(fd = open(name_.c_str(), O_CREAT, 0644), SyscallSucceeds());
+ ASSERT_THAT(close(fd), SyscallSucceeds());
+ }
+
+ void TearDown() override { unlink(name_.c_str()); }
+
+ public:
+ std::string name_;
+};
+
+TEST_F(ReadTest, ZeroBuffer) {
+ int fd;
+ ASSERT_THAT(fd = open(name_.c_str(), O_RDWR), SyscallSucceeds());
+
+ char msg[] = "hello world";
+ EXPECT_THAT(PwriteFd(fd, msg, strlen(msg), 0),
+ SyscallSucceedsWithValue(strlen(msg)));
+
+ char buf[10];
+ EXPECT_THAT(ReadFd(fd, buf, 0), SyscallSucceedsWithValue(0));
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+TEST_F(ReadTest, EmptyFileReturnsZeroAtEOF) {
+ int fd;
+ ASSERT_THAT(fd = open(name_.c_str(), O_RDWR), SyscallSucceeds());
+
+ char eof_buf[10];
+ EXPECT_THAT(ReadFd(fd, eof_buf, 10), SyscallSucceedsWithValue(0));
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+TEST_F(ReadTest, EofAfterRead) {
+ int fd;
+ ASSERT_THAT(fd = open(name_.c_str(), O_RDWR), SyscallSucceeds());
+
+ // Write some bytes to be read.
+ constexpr char kMessage[] = "hello world";
+ EXPECT_THAT(PwriteFd(fd, kMessage, sizeof(kMessage), 0),
+ SyscallSucceedsWithValue(sizeof(kMessage)));
+
+ // Read all of the bytes at once.
+ char buf[sizeof(kMessage)];
+ EXPECT_THAT(ReadFd(fd, buf, sizeof(kMessage)),
+ SyscallSucceedsWithValue(sizeof(kMessage)));
+
+ // Read again with a non-zero buffer and expect EOF.
+ char eof_buf[10];
+ EXPECT_THAT(ReadFd(fd, eof_buf, 10), SyscallSucceedsWithValue(0));
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+TEST_F(ReadTest, DevNullReturnsEof) {
+ int fd;
+ ASSERT_THAT(fd = open("/dev/null", O_RDONLY), SyscallSucceeds());
+ std::vector<char> buf(1);
+ EXPECT_THAT(ReadFd(fd, buf.data(), 1), SyscallSucceedsWithValue(0));
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+const int kReadSize = 128 * 1024;
+
+// Do not allow random save as it could lead to partial reads.
+TEST_F(ReadTest, CanReadFullyFromDevZero_NoRandomSave) {
+ int fd;
+ ASSERT_THAT(fd = open("/dev/zero", O_RDONLY), SyscallSucceeds());
+
+ std::vector<char> buf(kReadSize, 1);
+ EXPECT_THAT(ReadFd(fd, buf.data(), kReadSize),
+ SyscallSucceedsWithValue(kReadSize));
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+ EXPECT_EQ(std::vector<char>(kReadSize, 0), buf);
+}
+
+TEST_F(ReadTest, ReadDirectoryFails) {
+ const FileDescriptor file =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(GetAbsoluteTestTmpdir(), O_RDONLY));
+ std::vector<char> buf(1);
+ EXPECT_THAT(ReadFd(file.get(), buf.data(), 1), SyscallFailsWithErrno(EISDIR));
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/readv.cc b/test/syscalls/linux/readv.cc
new file mode 100644
index 000000000..0b933673a
--- /dev/null
+++ b/test/syscalls/linux/readv.cc
@@ -0,0 +1,293 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/file_base.h"
+#include "test/syscalls/linux/readv_common.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+#include "test/util/timer_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+class ReadvTest : public FileTest {
+ void SetUp() override {
+ FileTest::SetUp();
+
+ ASSERT_THAT(write(test_file_fd_.get(), kReadvTestData, kReadvTestDataSize),
+ SyscallSucceedsWithValue(kReadvTestDataSize));
+ ASSERT_THAT(lseek(test_file_fd_.get(), 0, SEEK_SET),
+ SyscallSucceedsWithValue(0));
+ ASSERT_THAT(write(test_pipe_[1], kReadvTestData, kReadvTestDataSize),
+ SyscallSucceedsWithValue(kReadvTestDataSize));
+ }
+};
+
+TEST_F(ReadvTest, ReadOneBufferPerByte_File) {
+ ReadOneBufferPerByte(test_file_fd_.get());
+}
+
+TEST_F(ReadvTest, ReadOneBufferPerByte_Pipe) {
+ ReadOneBufferPerByte(test_pipe_[0]);
+}
+
+TEST_F(ReadvTest, ReadOneHalfAtATime_File) {
+ ReadOneHalfAtATime(test_file_fd_.get());
+}
+
+TEST_F(ReadvTest, ReadOneHalfAtATime_Pipe) {
+ ReadOneHalfAtATime(test_pipe_[0]);
+}
+
+TEST_F(ReadvTest, ReadAllOneBuffer_File) {
+ ReadAllOneBuffer(test_file_fd_.get());
+}
+
+TEST_F(ReadvTest, ReadAllOneBuffer_Pipe) { ReadAllOneBuffer(test_pipe_[0]); }
+
+TEST_F(ReadvTest, ReadAllOneLargeBuffer_File) {
+ ReadAllOneLargeBuffer(test_file_fd_.get());
+}
+
+TEST_F(ReadvTest, ReadAllOneLargeBuffer_Pipe) {
+ ReadAllOneLargeBuffer(test_pipe_[0]);
+}
+
+TEST_F(ReadvTest, ReadBuffersOverlapping_File) {
+ ReadBuffersOverlapping(test_file_fd_.get());
+}
+
+TEST_F(ReadvTest, ReadBuffersOverlapping_Pipe) {
+ ReadBuffersOverlapping(test_pipe_[0]);
+}
+
+TEST_F(ReadvTest, ReadBuffersDiscontinuous_File) {
+ ReadBuffersDiscontinuous(test_file_fd_.get());
+}
+
+TEST_F(ReadvTest, ReadBuffersDiscontinuous_Pipe) {
+ ReadBuffersDiscontinuous(test_pipe_[0]);
+}
+
+TEST_F(ReadvTest, ReadIovecsCompletelyFilled_File) {
+ ReadIovecsCompletelyFilled(test_file_fd_.get());
+}
+
+TEST_F(ReadvTest, ReadIovecsCompletelyFilled_Pipe) {
+ ReadIovecsCompletelyFilled(test_pipe_[0]);
+}
+
+TEST_F(ReadvTest, BadFileDescriptor) {
+ char buffer[1024];
+ struct iovec iov[1];
+ iov[0].iov_base = buffer;
+ iov[0].iov_len = 1024;
+
+ ASSERT_THAT(readv(-1, iov, 1024), SyscallFailsWithErrno(EBADF));
+}
+
+TEST_F(ReadvTest, BadIovecsPointer_File) {
+ ASSERT_THAT(readv(test_file_fd_.get(), nullptr, 1),
+ SyscallFailsWithErrno(EFAULT));
+}
+
+TEST_F(ReadvTest, BadIovecsPointer_Pipe) {
+ ASSERT_THAT(readv(test_pipe_[0], nullptr, 1), SyscallFailsWithErrno(EFAULT));
+}
+
+TEST_F(ReadvTest, BadIovecBase_File) {
+ struct iovec iov[1];
+ iov[0].iov_base = nullptr;
+ iov[0].iov_len = 1024;
+ ASSERT_THAT(readv(test_file_fd_.get(), iov, 1),
+ SyscallFailsWithErrno(EFAULT));
+}
+
+TEST_F(ReadvTest, BadIovecBase_Pipe) {
+ struct iovec iov[1];
+ iov[0].iov_base = nullptr;
+ iov[0].iov_len = 1024;
+ ASSERT_THAT(readv(test_pipe_[0], iov, 1), SyscallFailsWithErrno(EFAULT));
+}
+
+TEST_F(ReadvTest, ZeroIovecs_File) {
+ struct iovec iov[1];
+ iov[0].iov_base = 0;
+ iov[0].iov_len = 0;
+ ASSERT_THAT(readv(test_file_fd_.get(), iov, 1), SyscallSucceeds());
+}
+
+TEST_F(ReadvTest, ZeroIovecs_Pipe) {
+ struct iovec iov[1];
+ iov[0].iov_base = 0;
+ iov[0].iov_len = 0;
+ ASSERT_THAT(readv(test_pipe_[0], iov, 1), SyscallSucceeds());
+}
+
+TEST_F(ReadvTest, NotReadable_File) {
+ char buffer[1024];
+ struct iovec iov[1];
+ iov[0].iov_base = buffer;
+ iov[0].iov_len = 1024;
+
+ std::string wronly_file = NewTempAbsPath();
+ FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(
+ Open(wronly_file, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR));
+ ASSERT_THAT(readv(fd.get(), iov, 1), SyscallFailsWithErrno(EBADF));
+ fd.reset(); // Close before unlinking.
+ ASSERT_THAT(unlink(wronly_file.c_str()), SyscallSucceeds());
+}
+
+TEST_F(ReadvTest, NotReadable_Pipe) {
+ char buffer[1024];
+ struct iovec iov[1];
+ iov[0].iov_base = buffer;
+ iov[0].iov_len = 1024;
+ ASSERT_THAT(readv(test_pipe_[1], iov, 1), SyscallFailsWithErrno(EBADF));
+}
+
+TEST_F(ReadvTest, DirNotReadable) {
+ char buffer[1024];
+ struct iovec iov[1];
+ iov[0].iov_base = buffer;
+ iov[0].iov_len = 1024;
+
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(GetAbsoluteTestTmpdir(), O_RDONLY));
+ ASSERT_THAT(readv(fd.get(), iov, 1), SyscallFailsWithErrno(EISDIR));
+}
+
+TEST_F(ReadvTest, OffsetIncremented) {
+ char* buffer = reinterpret_cast<char*>(malloc(kReadvTestDataSize));
+ struct iovec iov[1];
+ iov[0].iov_base = buffer;
+ iov[0].iov_len = kReadvTestDataSize;
+
+ ASSERT_THAT(readv(test_file_fd_.get(), iov, 1),
+ SyscallSucceedsWithValue(kReadvTestDataSize));
+ ASSERT_THAT(lseek(test_file_fd_.get(), 0, SEEK_CUR),
+ SyscallSucceedsWithValue(kReadvTestDataSize));
+
+ free(buffer);
+}
+
+TEST_F(ReadvTest, EndOfFile) {
+ char* buffer = reinterpret_cast<char*>(malloc(kReadvTestDataSize));
+ struct iovec iov[1];
+ iov[0].iov_base = buffer;
+ iov[0].iov_len = kReadvTestDataSize;
+ ASSERT_THAT(readv(test_file_fd_.get(), iov, 1),
+ SyscallSucceedsWithValue(kReadvTestDataSize));
+ free(buffer);
+
+ buffer = reinterpret_cast<char*>(malloc(kReadvTestDataSize));
+ iov[0].iov_base = buffer;
+ iov[0].iov_len = kReadvTestDataSize;
+ ASSERT_THAT(readv(test_file_fd_.get(), iov, 1), SyscallSucceedsWithValue(0));
+ free(buffer);
+}
+
+TEST_F(ReadvTest, WouldBlock_Pipe) {
+ struct iovec iov[1];
+ iov[0].iov_base = reinterpret_cast<char*>(malloc(kReadvTestDataSize));
+ iov[0].iov_len = kReadvTestDataSize;
+ ASSERT_THAT(readv(test_pipe_[0], iov, 1),
+ SyscallSucceedsWithValue(kReadvTestDataSize));
+ free(iov[0].iov_base);
+
+ iov[0].iov_base = reinterpret_cast<char*>(malloc(kReadvTestDataSize));
+ ASSERT_THAT(readv(test_pipe_[0], iov, 1), SyscallFailsWithErrno(EAGAIN));
+ free(iov[0].iov_base);
+}
+
+TEST_F(ReadvTest, ZeroBuffer) {
+ char buf[10];
+ struct iovec iov[1];
+ iov[0].iov_base = buf;
+ iov[0].iov_len = 0;
+ ASSERT_THAT(readv(test_pipe_[0], iov, 1), SyscallSucceedsWithValue(0));
+}
+
+TEST_F(ReadvTest, NullIovecInNonemptyArray) {
+ std::vector<char> buf(kReadvTestDataSize);
+ struct iovec iov[2];
+ iov[0].iov_base = nullptr;
+ iov[0].iov_len = 0;
+ iov[1].iov_base = buf.data();
+ iov[1].iov_len = kReadvTestDataSize;
+ ASSERT_THAT(readv(test_file_fd_.get(), iov, 2),
+ SyscallSucceedsWithValue(kReadvTestDataSize));
+}
+
+TEST_F(ReadvTest, IovecOutsideTaskAddressRangeInNonemptyArray) {
+ std::vector<char> buf(kReadvTestDataSize);
+ struct iovec iov[2];
+ iov[0].iov_base = reinterpret_cast<void*>(~static_cast<uintptr_t>(0));
+ iov[0].iov_len = 0;
+ iov[1].iov_base = buf.data();
+ iov[1].iov_len = kReadvTestDataSize;
+ ASSERT_THAT(readv(test_file_fd_.get(), iov, 2),
+ SyscallFailsWithErrno(EFAULT));
+}
+
+// This test depends on the maximum extent of a single readv() syscall, so
+// we can't tolerate interruption from saving.
+TEST(ReadvTestNoFixture, TruncatedAtMax_NoRandomSave) {
+ // Ensure that we won't be interrupted by ITIMER_PROF.
+ struct itimerval itv = {};
+ auto const cleanup_itimer =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedItimer(ITIMER_PROF, itv));
+
+ // From Linux's include/linux/fs.h.
+ size_t const MAX_RW_COUNT = INT_MAX & ~(kPageSize - 1);
+
+ // Create an iovec array with 3 segments pointing to consecutive parts of a
+ // buffer. The first covers all but the last three pages, and should be
+ // written to in its entirety. The second covers the last page before
+ // MAX_RW_COUNT and the first page after; only the first page should be
+ // written to. The third covers the last page of the buffer, and should be
+ // skipped entirely.
+ size_t const kBufferSize = MAX_RW_COUNT + 2 * kPageSize;
+ size_t const kFirstOffset = MAX_RW_COUNT - kPageSize;
+ size_t const kSecondOffset = MAX_RW_COUNT + kPageSize;
+ // The buffer is too big to fit on the stack.
+ std::vector<char> buf(kBufferSize);
+ struct iovec iov[3];
+ iov[0].iov_base = buf.data();
+ iov[0].iov_len = kFirstOffset;
+ iov[1].iov_base = buf.data() + kFirstOffset;
+ iov[1].iov_len = kSecondOffset - kFirstOffset;
+ iov[2].iov_base = buf.data() + kSecondOffset;
+ iov[2].iov_len = kBufferSize - kSecondOffset;
+
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_RDONLY));
+ EXPECT_THAT(readv(fd.get(), iov, 3), SyscallSucceedsWithValue(MAX_RW_COUNT));
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/readv_common.cc b/test/syscalls/linux/readv_common.cc
new file mode 100644
index 000000000..349b80d7f
--- /dev/null
+++ b/test/syscalls/linux/readv_common.cc
@@ -0,0 +1,180 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/file_base.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+extern const char kReadvTestData[] =
+ "127.0.0.1 localhost"
+ ""
+ "# The following lines are desirable for IPv6 capable hosts"
+ "::1 ip6-localhost ip6-loopback"
+ "fe00::0 ip6-localnet"
+ "ff00::0 ip6-mcastprefix"
+ "ff02::1 ip6-allnodes"
+ "ff02::2 ip6-allrouters"
+ "ff02::3 ip6-allhosts"
+ "192.168.1.100 a"
+ "93.184.216.34 foo.bar.example.com xcpu";
+extern const size_t kReadvTestDataSize = sizeof(kReadvTestData);
+
+static void ReadAllOneProvidedBuffer(int fd, std::vector<char>* buffer) {
+ struct iovec iovs[1];
+ iovs[0].iov_base = buffer->data();
+ iovs[0].iov_len = kReadvTestDataSize;
+
+ ASSERT_THAT(readv(fd, iovs, 1), SyscallSucceedsWithValue(kReadvTestDataSize));
+
+ std::pair<struct iovec*, int> iovec_desc(iovs, 1);
+ EXPECT_THAT(iovec_desc, MatchesStringLength(kReadvTestDataSize));
+ EXPECT_THAT(iovec_desc, MatchesStringValue(kReadvTestData));
+}
+
+void ReadAllOneBuffer(int fd) {
+ std::vector<char> buffer(kReadvTestDataSize);
+ ReadAllOneProvidedBuffer(fd, &buffer);
+}
+
+void ReadAllOneLargeBuffer(int fd) {
+ std::vector<char> buffer(10 * kReadvTestDataSize);
+ ReadAllOneProvidedBuffer(fd, &buffer);
+}
+
+void ReadOneHalfAtATime(int fd) {
+ int len0 = kReadvTestDataSize / 2;
+ int len1 = kReadvTestDataSize - len0;
+ std::vector<char> buffer0(len0);
+ std::vector<char> buffer1(len1);
+
+ struct iovec iovs[2];
+ iovs[0].iov_base = buffer0.data();
+ iovs[0].iov_len = len0;
+ iovs[1].iov_base = buffer1.data();
+ iovs[1].iov_len = len1;
+
+ ASSERT_THAT(readv(fd, iovs, 2), SyscallSucceedsWithValue(kReadvTestDataSize));
+
+ std::pair<struct iovec*, int> iovec_desc(iovs, 2);
+ EXPECT_THAT(iovec_desc, MatchesStringLength(kReadvTestDataSize));
+ EXPECT_THAT(iovec_desc, MatchesStringValue(kReadvTestData));
+}
+
+void ReadOneBufferPerByte(int fd) {
+ std::vector<char> buffer(kReadvTestDataSize);
+ std::vector<struct iovec> iovs(kReadvTestDataSize);
+ char* buffer_ptr = buffer.data();
+ struct iovec* iovs_ptr = iovs.data();
+
+ for (int i = 0; i < static_cast<int>(kReadvTestDataSize); i++) {
+ struct iovec iov = {
+ .iov_base = &buffer_ptr[i],
+ .iov_len = 1,
+ };
+ iovs_ptr[i] = iov;
+ }
+
+ ASSERT_THAT(readv(fd, iovs_ptr, kReadvTestDataSize),
+ SyscallSucceedsWithValue(kReadvTestDataSize));
+
+ std::pair<struct iovec*, int> iovec_desc(iovs.data(), kReadvTestDataSize);
+ EXPECT_THAT(iovec_desc, MatchesStringLength(kReadvTestDataSize));
+ EXPECT_THAT(iovec_desc, MatchesStringValue(kReadvTestData));
+}
+
+void ReadBuffersOverlapping(int fd) {
+ // overlap the first overlap_bytes.
+ int overlap_bytes = 8;
+ std::vector<char> buffer(kReadvTestDataSize);
+
+ // overlapping causes us to get more data.
+ int expected_size = kReadvTestDataSize + overlap_bytes;
+ std::vector<char> expected(expected_size);
+ char* expected_ptr = expected.data();
+ memcpy(expected_ptr, &kReadvTestData[overlap_bytes], overlap_bytes);
+ memcpy(&expected_ptr[overlap_bytes], &kReadvTestData[overlap_bytes],
+ kReadvTestDataSize);
+
+ struct iovec iovs[2];
+ iovs[0].iov_base = buffer.data();
+ iovs[0].iov_len = overlap_bytes;
+ iovs[1].iov_base = buffer.data();
+ iovs[1].iov_len = kReadvTestDataSize;
+
+ ASSERT_THAT(readv(fd, iovs, 2), SyscallSucceedsWithValue(kReadvTestDataSize));
+
+ std::pair<struct iovec*, int> iovec_desc(iovs, 2);
+ EXPECT_THAT(iovec_desc, MatchesStringLength(expected_size));
+ EXPECT_THAT(iovec_desc, MatchesStringValue(expected_ptr));
+}
+
+void ReadBuffersDiscontinuous(int fd) {
+ // Each iov is 1 byte separated by 1 byte.
+ std::vector<char> buffer(kReadvTestDataSize * 2);
+ std::vector<struct iovec> iovs(kReadvTestDataSize);
+
+ char* buffer_ptr = buffer.data();
+ struct iovec* iovs_ptr = iovs.data();
+
+ for (int i = 0; i < static_cast<int>(kReadvTestDataSize); i++) {
+ struct iovec iov = {
+ .iov_base = &buffer_ptr[i * 2],
+ .iov_len = 1,
+ };
+ iovs_ptr[i] = iov;
+ }
+
+ ASSERT_THAT(readv(fd, iovs_ptr, kReadvTestDataSize),
+ SyscallSucceedsWithValue(kReadvTestDataSize));
+
+ std::pair<struct iovec*, int> iovec_desc(iovs.data(), kReadvTestDataSize);
+ EXPECT_THAT(iovec_desc, MatchesStringLength(kReadvTestDataSize));
+ EXPECT_THAT(iovec_desc, MatchesStringValue(kReadvTestData));
+}
+
+void ReadIovecsCompletelyFilled(int fd) {
+ int half = kReadvTestDataSize / 2;
+ std::vector<char> buffer(kReadvTestDataSize);
+ char* buffer_ptr = buffer.data();
+ memset(buffer.data(), '\0', kReadvTestDataSize);
+
+ struct iovec iovs[2];
+ iovs[0].iov_base = buffer.data();
+ iovs[0].iov_len = half;
+ iovs[1].iov_base = &buffer_ptr[half];
+ iovs[1].iov_len = half;
+
+ ASSERT_THAT(readv(fd, iovs, 2), SyscallSucceedsWithValue(half * 2));
+
+ std::pair<struct iovec*, int> iovec_desc(iovs, 2);
+ EXPECT_THAT(iovec_desc, MatchesStringLength(half * 2));
+ EXPECT_THAT(iovec_desc, MatchesStringValue(kReadvTestData));
+
+ char* str = static_cast<char*>(iovs[0].iov_base);
+ str[iovs[0].iov_len - 1] = '\0';
+ ASSERT_EQ(half - 1, strlen(str));
+}
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/readv_common.h b/test/syscalls/linux/readv_common.h
new file mode 100644
index 000000000..e261d545a
--- /dev/null
+++ b/test/syscalls/linux/readv_common.h
@@ -0,0 +1,61 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_READV_COMMON_H_
+#define GVISOR_TEST_SYSCALLS_READV_COMMON_H_
+
+#include <stddef.h>
+
+namespace gvisor {
+namespace testing {
+
+// A NUL-terminated std::string containing the data used by tests using the following
+// test helpers.
+extern const char kReadvTestData[];
+
+// The size of kReadvTestData, including the terminating NUL.
+extern const size_t kReadvTestDataSize;
+
+// ReadAllOneBuffer asserts that it can read kReadvTestData from an fd using
+// exactly one iovec.
+void ReadAllOneBuffer(int fd);
+
+// ReadAllOneLargeBuffer asserts that it can read kReadvTestData from an fd
+// using exactly one iovec containing an overly large buffer.
+void ReadAllOneLargeBuffer(int fd);
+
+// ReadOneHalfAtATime asserts that it can read test_data_from an fd using
+// exactly two iovecs that are roughly equivalent in size.
+void ReadOneHalfAtATime(int fd);
+
+// ReadOneBufferPerByte asserts that it can read kReadvTestData from an fd
+// using one iovec per byte.
+void ReadOneBufferPerByte(int fd);
+
+// ReadBuffersOverlapping asserts that it can read kReadvTestData from an fd
+// where two iovecs are overlapping.
+void ReadBuffersOverlapping(int fd);
+
+// ReadBuffersDiscontinuous asserts that it can read kReadvTestData from an fd
+// where each iovec is discontinuous from the next by 1 byte.
+void ReadBuffersDiscontinuous(int fd);
+
+// ReadIovecsCompletelyFilled asserts that the previous iovec is completely
+// filled before moving onto the next.
+void ReadIovecsCompletelyFilled(int fd);
+
+} // namespace testing
+} // namespace gvisor
+
+#endif // GVISOR_TEST_SYSCALLS_READV_COMMON_H_
diff --git a/test/syscalls/linux/readv_socket.cc b/test/syscalls/linux/readv_socket.cc
new file mode 100644
index 000000000..2c129b7e8
--- /dev/null
+++ b/test/syscalls/linux/readv_socket.cc
@@ -0,0 +1,182 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/file_base.h"
+#include "test/syscalls/linux/readv_common.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+class ReadvSocketTest : public SocketTest {
+ void SetUp() override {
+ SocketTest::SetUp();
+ ASSERT_THAT(
+ write(test_unix_stream_socket_[1], kReadvTestData, kReadvTestDataSize),
+ SyscallSucceedsWithValue(kReadvTestDataSize));
+ ASSERT_THAT(
+ write(test_unix_dgram_socket_[1], kReadvTestData, kReadvTestDataSize),
+ SyscallSucceedsWithValue(kReadvTestDataSize));
+ ASSERT_THAT(write(test_unix_seqpacket_socket_[1], kReadvTestData,
+ kReadvTestDataSize),
+ SyscallSucceedsWithValue(kReadvTestDataSize));
+ // FIXME: Enable when possible.
+ // ASSERT_THAT(write(test_tcp_socket_[1], kReadvTestData,
+ // kReadvTestDataSize),
+ // SyscallSucceedsWithValue(kReadvTestDataSize));
+ }
+};
+
+TEST_F(ReadvSocketTest, ReadOneBufferPerByte_StreamSocket) {
+ ReadOneBufferPerByte(test_unix_stream_socket_[0]);
+}
+
+TEST_F(ReadvSocketTest, ReadOneBufferPerByte_DgramSocket) {
+ ReadOneBufferPerByte(test_unix_dgram_socket_[0]);
+}
+
+TEST_F(ReadvSocketTest, ReadOneBufferPerByte_SeqPacketSocket) {
+ ReadOneBufferPerByte(test_unix_seqpacket_socket_[0]);
+}
+
+TEST_F(ReadvSocketTest, ReadOneHalfAtATime_StreamSocket) {
+ ReadOneHalfAtATime(test_unix_stream_socket_[0]);
+}
+
+TEST_F(ReadvSocketTest, ReadOneHalfAtATime_DgramSocket) {
+ ReadOneHalfAtATime(test_unix_dgram_socket_[0]);
+}
+
+TEST_F(ReadvSocketTest, ReadAllOneBuffer_StreamSocket) {
+ ReadAllOneBuffer(test_unix_stream_socket_[0]);
+}
+
+TEST_F(ReadvSocketTest, ReadAllOneBuffer_DgramSocket) {
+ ReadAllOneBuffer(test_unix_dgram_socket_[0]);
+}
+
+TEST_F(ReadvSocketTest, ReadAllOneLargeBuffer_StreamSocket) {
+ ReadAllOneLargeBuffer(test_unix_stream_socket_[0]);
+}
+
+TEST_F(ReadvSocketTest, ReadAllOneLargeBuffer_DgramSocket) {
+ ReadAllOneLargeBuffer(test_unix_dgram_socket_[0]);
+}
+
+TEST_F(ReadvSocketTest, ReadBuffersOverlapping_StreamSocket) {
+ ReadBuffersOverlapping(test_unix_stream_socket_[0]);
+}
+
+TEST_F(ReadvSocketTest, ReadBuffersOverlapping_DgramSocket) {
+ ReadBuffersOverlapping(test_unix_dgram_socket_[0]);
+}
+
+TEST_F(ReadvSocketTest, ReadBuffersDiscontinuous_StreamSocket) {
+ ReadBuffersDiscontinuous(test_unix_stream_socket_[0]);
+}
+
+TEST_F(ReadvSocketTest, ReadBuffersDiscontinuous_DgramSocket) {
+ ReadBuffersDiscontinuous(test_unix_dgram_socket_[0]);
+}
+
+TEST_F(ReadvSocketTest, ReadIovecsCompletelyFilled_StreamSocket) {
+ ReadIovecsCompletelyFilled(test_unix_stream_socket_[0]);
+}
+
+TEST_F(ReadvSocketTest, ReadIovecsCompletelyFilled_DgramSocket) {
+ ReadIovecsCompletelyFilled(test_unix_dgram_socket_[0]);
+}
+
+TEST_F(ReadvSocketTest, BadIovecsPointer_StreamSocket) {
+ ASSERT_THAT(readv(test_unix_stream_socket_[0], nullptr, 1),
+ SyscallFailsWithErrno(EFAULT));
+}
+
+TEST_F(ReadvSocketTest, BadIovecsPointer_DgramSocket) {
+ ASSERT_THAT(readv(test_unix_dgram_socket_[0], nullptr, 1),
+ SyscallFailsWithErrno(EFAULT));
+}
+
+TEST_F(ReadvSocketTest, BadIovecBase_StreamSocket) {
+ struct iovec iov[1];
+ iov[0].iov_base = nullptr;
+ iov[0].iov_len = 1024;
+ ASSERT_THAT(readv(test_unix_stream_socket_[0], iov, 1),
+ SyscallFailsWithErrno(EFAULT));
+}
+
+TEST_F(ReadvSocketTest, BadIovecBase_DgramSocket) {
+ struct iovec iov[1];
+ iov[0].iov_base = nullptr;
+ iov[0].iov_len = 1024;
+ ASSERT_THAT(readv(test_unix_dgram_socket_[0], iov, 1),
+ SyscallFailsWithErrno(EFAULT));
+}
+
+TEST_F(ReadvSocketTest, ZeroIovecs_StreamSocket) {
+ struct iovec iov[1];
+ iov[0].iov_base = 0;
+ iov[0].iov_len = 0;
+ ASSERT_THAT(readv(test_unix_stream_socket_[0], iov, 1), SyscallSucceeds());
+}
+
+TEST_F(ReadvSocketTest, ZeroIovecs_DgramSocket) {
+ struct iovec iov[1];
+ iov[0].iov_base = 0;
+ iov[0].iov_len = 0;
+ ASSERT_THAT(readv(test_unix_dgram_socket_[0], iov, 1), SyscallSucceeds());
+}
+
+TEST_F(ReadvSocketTest, WouldBlock_StreamSocket) {
+ struct iovec iov[1];
+ iov[0].iov_base = reinterpret_cast<char*>(malloc(kReadvTestDataSize));
+ iov[0].iov_len = kReadvTestDataSize;
+ ASSERT_THAT(readv(test_unix_stream_socket_[0], iov, 1),
+ SyscallSucceedsWithValue(kReadvTestDataSize));
+ free(iov[0].iov_base);
+
+ iov[0].iov_base = reinterpret_cast<char*>(malloc(kReadvTestDataSize));
+ ASSERT_THAT(readv(test_unix_stream_socket_[0], iov, 1),
+ SyscallFailsWithErrno(EAGAIN));
+ free(iov[0].iov_base);
+}
+
+TEST_F(ReadvSocketTest, WouldBlock_DgramSocket) {
+ struct iovec iov[1];
+ iov[0].iov_base = reinterpret_cast<char*>(malloc(kReadvTestDataSize));
+ iov[0].iov_len = kReadvTestDataSize;
+ ASSERT_THAT(readv(test_unix_dgram_socket_[0], iov, 1),
+ SyscallSucceedsWithValue(kReadvTestDataSize));
+ free(iov[0].iov_base);
+
+ iov[0].iov_base = reinterpret_cast<char*>(malloc(kReadvTestDataSize));
+ ASSERT_THAT(readv(test_unix_dgram_socket_[0], iov, 1),
+ SyscallFailsWithErrno(EAGAIN));
+ free(iov[0].iov_base);
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/rename.cc b/test/syscalls/linux/rename.cc
new file mode 100644
index 000000000..f4c877a00
--- /dev/null
+++ b/test/syscalls/linux/rename.cc
@@ -0,0 +1,373 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <string>
+
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "absl/strings/string_view.h"
+#include "test/util/capability_util.h"
+#include "test/util/cleanup.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(RenameTest, RootToAnything) {
+ ASSERT_THAT(rename("/", "/bin"), SyscallFailsWithErrno(EBUSY));
+}
+
+TEST(RenameTest, AnythingToRoot) {
+ ASSERT_THAT(rename("/bin", "/"), SyscallFailsWithErrno(EBUSY));
+}
+
+TEST(RenameTest, SourceIsAncestorOfTarget) {
+ auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ auto subdir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir.path()));
+ ASSERT_THAT(rename(dir.path().c_str(), subdir.path().c_str()),
+ SyscallFailsWithErrno(EINVAL));
+
+ // Try an even deeper directory.
+ auto deep_subdir =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(subdir.path()));
+ ASSERT_THAT(rename(dir.path().c_str(), deep_subdir.path().c_str()),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(RenameTest, TargetIsAncestorOfSource) {
+ auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ auto subdir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir.path()));
+ ASSERT_THAT(rename(subdir.path().c_str(), dir.path().c_str()),
+ SyscallFailsWithErrno(ENOTEMPTY));
+
+ // Try an even deeper directory.
+ auto deep_subdir =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(subdir.path()));
+ ASSERT_THAT(rename(deep_subdir.path().c_str(), dir.path().c_str()),
+ SyscallFailsWithErrno(ENOTEMPTY));
+}
+
+TEST(RenameTest, FileToSelf) {
+ auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ EXPECT_THAT(rename(f.path().c_str(), f.path().c_str()), SyscallSucceeds());
+}
+
+TEST(RenameTest, DirectoryToSelf) {
+ auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ EXPECT_THAT(rename(f.path().c_str(), f.path().c_str()), SyscallSucceeds());
+}
+
+TEST(RenameTest, FileToSameDirectory) {
+ auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ std::string const newpath = NewTempAbsPath();
+ ASSERT_THAT(rename(f.path().c_str(), newpath.c_str()), SyscallSucceeds());
+ std::string const oldpath = f.release();
+ f.reset(newpath);
+ EXPECT_THAT(Exists(oldpath), IsPosixErrorOkAndHolds(false));
+ EXPECT_THAT(Exists(newpath), IsPosixErrorOkAndHolds(true));
+}
+
+TEST(RenameTest, DirectoryToSameDirectory) {
+ auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ std::string const newpath = NewTempAbsPath();
+ ASSERT_THAT(rename(dir.path().c_str(), newpath.c_str()), SyscallSucceeds());
+ std::string const oldpath = dir.release();
+ dir.reset(newpath);
+ EXPECT_THAT(Exists(oldpath), IsPosixErrorOkAndHolds(false));
+ EXPECT_THAT(Exists(newpath), IsPosixErrorOkAndHolds(true));
+}
+
+TEST(RenameTest, FileToParentDirectory) {
+ auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir1.path()));
+ auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir2.path()));
+ std::string const newpath = NewTempAbsPathInDir(dir1.path());
+ ASSERT_THAT(rename(f.path().c_str(), newpath.c_str()), SyscallSucceeds());
+ std::string const oldpath = f.release();
+ f.reset(newpath);
+ EXPECT_THAT(Exists(oldpath), IsPosixErrorOkAndHolds(false));
+ EXPECT_THAT(Exists(newpath), IsPosixErrorOkAndHolds(true));
+}
+
+TEST(RenameTest, DirectoryToParentDirectory) {
+ auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir1.path()));
+ auto dir3 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir2.path()));
+ EXPECT_THAT(IsDirectory(dir3.path()), IsPosixErrorOkAndHolds(true));
+ std::string const newpath = NewTempAbsPathInDir(dir1.path());
+ ASSERT_THAT(rename(dir3.path().c_str(), newpath.c_str()), SyscallSucceeds());
+ std::string const oldpath = dir3.release();
+ dir3.reset(newpath);
+ EXPECT_THAT(Exists(oldpath), IsPosixErrorOkAndHolds(false));
+ EXPECT_THAT(Exists(newpath), IsPosixErrorOkAndHolds(true));
+ EXPECT_THAT(IsDirectory(newpath), IsPosixErrorOkAndHolds(true));
+}
+
+TEST(RenameTest, FileToChildDirectory) {
+ auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir1.path()));
+ auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir1.path()));
+ std::string const newpath = NewTempAbsPathInDir(dir2.path());
+ ASSERT_THAT(rename(f.path().c_str(), newpath.c_str()), SyscallSucceeds());
+ std::string const oldpath = f.release();
+ f.reset(newpath);
+ EXPECT_THAT(Exists(oldpath), IsPosixErrorOkAndHolds(false));
+ EXPECT_THAT(Exists(newpath), IsPosixErrorOkAndHolds(true));
+}
+
+TEST(RenameTest, DirectoryToChildDirectory) {
+ auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir1.path()));
+ auto dir3 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir1.path()));
+ std::string const newpath = NewTempAbsPathInDir(dir2.path());
+ ASSERT_THAT(rename(dir3.path().c_str(), newpath.c_str()), SyscallSucceeds());
+ std::string const oldpath = dir3.release();
+ dir3.reset(newpath);
+ EXPECT_THAT(Exists(oldpath), IsPosixErrorOkAndHolds(false));
+ EXPECT_THAT(Exists(newpath), IsPosixErrorOkAndHolds(true));
+ EXPECT_THAT(IsDirectory(newpath), IsPosixErrorOkAndHolds(true));
+}
+
+TEST(RenameTest, DirectoryToOwnChildDirectory) {
+ auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir1.path()));
+ std::string const newpath = NewTempAbsPathInDir(dir2.path());
+ ASSERT_THAT(rename(dir1.path().c_str(), newpath.c_str()),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(RenameTest, FileOverwritesFile) {
+ auto f1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), "first", TempPath::kDefaultFileMode));
+ auto f2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), "second", TempPath::kDefaultFileMode));
+ ASSERT_THAT(rename(f1.path().c_str(), f2.path().c_str()), SyscallSucceeds());
+ EXPECT_THAT(Exists(f1.path()), IsPosixErrorOkAndHolds(false));
+
+ f1.release();
+ std::string f2_contents;
+ ASSERT_NO_ERRNO(GetContents(f2.path(), &f2_contents));
+ EXPECT_EQ("first", f2_contents);
+}
+
+TEST(RenameTest, FileDoesNotExist) {
+ auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const std::string source = JoinPath(dir.path(), "source");
+ const std::string dest = JoinPath(dir.path(), "dest");
+ ASSERT_THAT(rename(source.c_str(), dest.c_str()),
+ SyscallFailsWithErrno(ENOENT));
+}
+
+TEST(RenameTest, FileDoesNotOverwriteDirectory) {
+ auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ ASSERT_THAT(rename(f.path().c_str(), dir.path().c_str()),
+ SyscallFailsWithErrno(EISDIR));
+}
+
+TEST(RenameTest, DirectoryDoesNotOverwriteFile) {
+ auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ ASSERT_THAT(rename(dir.path().c_str(), f.path().c_str()),
+ SyscallFailsWithErrno(ENOTDIR));
+}
+
+TEST(RenameTest, DirectoryOverwritesEmptyDirectory) {
+ auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir1.path()));
+ auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ EXPECT_THAT(rename(dir1.path().c_str(), dir2.path().c_str()),
+ SyscallSucceeds());
+ EXPECT_THAT(Exists(dir1.path()), IsPosixErrorOkAndHolds(false));
+ dir1.release();
+ EXPECT_THAT(Exists(JoinPath(dir2.path(), Basename(f.path()))),
+ IsPosixErrorOkAndHolds(true));
+ f.release();
+}
+
+TEST(RenameTest, FailsWithDots) {
+ auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ auto dir1_dot = absl::StrCat(dir1.path(), "/.");
+ auto dir2_dot = absl::StrCat(dir2.path(), "/.");
+ auto dir1_dot_dot = absl::StrCat(dir1.path(), "/..");
+ auto dir2_dot_dot = absl::StrCat(dir2.path(), "/..");
+
+ // Try with dot paths in the first argument
+ EXPECT_THAT(rename(dir1_dot.c_str(), dir2.path().c_str()),
+ SyscallFailsWithErrno(EBUSY));
+ EXPECT_THAT(rename(dir1_dot_dot.c_str(), dir2.path().c_str()),
+ SyscallFailsWithErrno(EBUSY));
+
+ // Try with dot paths in the second argument
+ EXPECT_THAT(rename(dir1.path().c_str(), dir2_dot.c_str()),
+ SyscallFailsWithErrno(EBUSY));
+ EXPECT_THAT(rename(dir1.path().c_str(), dir2_dot_dot.c_str()),
+ SyscallFailsWithErrno(EBUSY));
+}
+
+TEST(RenameTest, DirectoryDoesNotOverwriteNonemptyDirectory) {
+ auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ auto f1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir1.path()));
+ auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ auto f2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir2.path()));
+ ASSERT_THAT(rename(dir1.path().c_str(), dir2.path().c_str()),
+ SyscallFailsWithErrno(ENOTEMPTY));
+}
+
+TEST(RenameTest, FailsWhenOldParentNotWritable) {
+ // Drop capabilities that allow us to override file and directory permissions.
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+ auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ auto f1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir1.path()));
+ auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ // dir1 is not writable.
+ ASSERT_THAT(chmod(dir1.path().c_str(), 0555), SyscallSucceeds());
+
+ std::string const newpath = NewTempAbsPathInDir(dir2.path());
+ EXPECT_THAT(rename(f1.path().c_str(), newpath.c_str()),
+ SyscallFailsWithErrno(EACCES));
+}
+
+TEST(RenameTest, FailsWhenNewParentNotWritable) {
+ // Drop capabilities that allow us to override file and directory permissions.
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+ auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ auto f1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir1.path()));
+ // dir2 is not writable.
+ auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(
+ TempPath::CreateDirWith(GetAbsoluteTestTmpdir(), 0555));
+
+ std::string const newpath = NewTempAbsPathInDir(dir2.path());
+ EXPECT_THAT(rename(f1.path().c_str(), newpath.c_str()),
+ SyscallFailsWithErrno(EACCES));
+}
+
+// Equivalent to FailsWhenNewParentNotWritable, but with a destination file
+// to overwrite.
+TEST(RenameTest, OverwriteFailsWhenNewParentNotWritable) {
+ // Drop capabilities that allow us to override file and directory permissions.
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+ auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ auto f1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir1.path()));
+
+ // dir2 is not writable.
+ auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ auto f2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir2.path()));
+ ASSERT_THAT(chmod(dir2.path().c_str(), 0555), SyscallSucceeds());
+
+ EXPECT_THAT(rename(f1.path().c_str(), f2.path().c_str()),
+ SyscallFailsWithErrno(EACCES));
+}
+
+// If the parent directory of source is not accessible, rename returns EACCES
+// because the user cannot determine if source exists.
+TEST(RenameTest, FileDoesNotExistWhenNewParentNotExecutable) {
+ // Drop capabilities that allow us to override file and directory permissions.
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+ // No execute permission.
+ auto dir = ASSERT_NO_ERRNO_AND_VALUE(
+ TempPath::CreateDirWith(GetAbsoluteTestTmpdir(), 0400));
+
+ const std::string source = JoinPath(dir.path(), "source");
+ const std::string dest = JoinPath(dir.path(), "dest");
+ ASSERT_THAT(rename(source.c_str(), dest.c_str()),
+ SyscallFailsWithErrno(EACCES));
+}
+
+TEST(RenameTest, DirectoryWithOpenFdOverwritesEmptyDirectory) {
+ auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir1.path()));
+ auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+ // Get an fd on dir1
+ int fd;
+ ASSERT_THAT(fd = open(dir1.path().c_str(), O_DIRECTORY), SyscallSucceeds());
+ auto close_f = Cleanup([fd] {
+ // Close the fd on f.
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+ });
+
+ EXPECT_THAT(rename(dir1.path().c_str(), dir2.path().c_str()),
+ SyscallSucceeds());
+
+ const std::string new_f_path = JoinPath(dir2.path(), Basename(f.path()));
+
+ auto remove_f = Cleanup([&] {
+ // Delete f in its new location.
+ ASSERT_NO_ERRNO(Delete(new_f_path));
+ f.release();
+ });
+
+ EXPECT_THAT(Exists(dir1.path()), IsPosixErrorOkAndHolds(false));
+ dir1.release();
+ EXPECT_THAT(Exists(new_f_path), IsPosixErrorOkAndHolds(true));
+}
+
+TEST(RenameTest, FileWithOpenFd) {
+ TempPath root_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ TempPath dir1 =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(root_dir.path()));
+ TempPath dir2 =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(root_dir.path()));
+ TempPath dir3 =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(root_dir.path()));
+
+ // Create file in dir1.
+ constexpr char kContents[] = "foo";
+ auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ dir1.path(), kContents, TempPath::kDefaultFileMode));
+
+ // Get fd on file.
+ const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDWR));
+
+ // Move f to dir2.
+ const std::string path2 = NewTempAbsPathInDir(dir2.path());
+ ASSERT_THAT(rename(f.path().c_str(), path2.c_str()), SyscallSucceeds());
+
+ // Read f's kContents.
+ char buf[sizeof(kContents)];
+ EXPECT_THAT(PreadFd(fd.get(), &buf, sizeof(kContents), 0),
+ SyscallSucceedsWithValue(sizeof(kContents) - 1));
+ EXPECT_EQ(absl::string_view(buf, sizeof(buf) - 1), kContents);
+
+ // Move f to dir3.
+ const std::string path3 = NewTempAbsPathInDir(dir3.path());
+ ASSERT_THAT(rename(path2.c_str(), path3.c_str()), SyscallSucceeds());
+
+ // Read f's kContents.
+ EXPECT_THAT(PreadFd(fd.get(), &buf, sizeof(kContents), 0),
+ SyscallSucceedsWithValue(sizeof(kContents) - 1));
+ EXPECT_EQ(absl::string_view(buf, sizeof(buf) - 1), kContents);
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/rlimits.cc b/test/syscalls/linux/rlimits.cc
new file mode 100644
index 000000000..0072285f9
--- /dev/null
+++ b/test/syscalls/linux/rlimits.cc
@@ -0,0 +1,61 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/resource.h>
+#include <sys/time.h>
+
+#include "test/util/capability_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(RlimitTest, SetRlimitHigher) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_RESOURCE)));
+ SKIP_IF(!IsRunningOnGvisor());
+
+ struct rlimit rl = {};
+ EXPECT_THAT(getrlimit(RLIMIT_NOFILE, &rl), SyscallSucceeds());
+
+ // Even with CAP_SYS_RESOURCE, gVisor does not allow setting a higher rlimit.
+ rl.rlim_max++;
+ EXPECT_THAT(setrlimit(RLIMIT_NOFILE, &rl), SyscallFailsWithErrno(EPERM));
+}
+
+TEST(RlimitTest, UnprivilegedSetRlimit) {
+ // Drop privileges if necessary.
+ if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_RESOURCE))) {
+ EXPECT_NO_ERRNO(SetCapability(CAP_SYS_RESOURCE, false));
+ }
+
+ struct rlimit rl = {};
+ rl.rlim_cur = 1000;
+ rl.rlim_max = 20000;
+ EXPECT_THAT(setrlimit(RLIMIT_NOFILE, &rl), SyscallSucceeds());
+
+ struct rlimit rl2 = {};
+ EXPECT_THAT(getrlimit(RLIMIT_NOFILE, &rl2), SyscallSucceeds());
+ EXPECT_EQ(rl.rlim_cur, rl2.rlim_cur);
+ EXPECT_EQ(rl.rlim_max, rl2.rlim_max);
+
+ rl.rlim_max = 100000;
+ EXPECT_THAT(setrlimit(RLIMIT_NOFILE, &rl), SyscallFailsWithErrno(EPERM));
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/rtsignal.cc b/test/syscalls/linux/rtsignal.cc
new file mode 100644
index 000000000..1f2fed7cc
--- /dev/null
+++ b/test/syscalls/linux/rtsignal.cc
@@ -0,0 +1,172 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <cerrno>
+#include <csignal>
+
+#include "gtest/gtest.h"
+#include "test/util/cleanup.h"
+#include "test/util/logging.h"
+#include "test/util/posix_error.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// saved_info is set by the handler.
+siginfo_t saved_info;
+
+// has_saved_info is set to true by the handler.
+volatile bool has_saved_info;
+
+void SigHandler(int sig, siginfo_t* info, void* context) {
+ // Copy to the given info.
+ saved_info = *info;
+ has_saved_info = true;
+}
+
+void ClearSavedInfo() {
+ // Clear the cached info.
+ memset(&saved_info, 0, sizeof(saved_info));
+ has_saved_info = false;
+}
+
+PosixErrorOr<Cleanup> SetupSignalHandler(int sig) {
+ struct sigaction sa;
+ sa.sa_sigaction = SigHandler;
+ sigfillset(&sa.sa_mask);
+ sa.sa_flags = SA_SIGINFO;
+ return ScopedSigaction(sig, sa);
+}
+
+class RtSignalTest : public ::testing::Test {
+ protected:
+ void SetUp() override {
+ action_cleanup_ = ASSERT_NO_ERRNO_AND_VALUE(SetupSignalHandler(SIGUSR1));
+ mask_cleanup_ =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGUSR1));
+ }
+
+ void TearDown() override { ClearSavedInfo(); }
+
+ private:
+ Cleanup action_cleanup_;
+ Cleanup mask_cleanup_;
+};
+
+static int rt_sigqueueinfo(pid_t tgid, int sig, siginfo_t* uinfo) {
+ int ret;
+ do {
+ // NOTE: rt_sigqueueinfo(2) could return EAGAIN for RT signals.
+ ret = syscall(SYS_rt_sigqueueinfo, tgid, sig, uinfo);
+ } while (ret == -1 && errno == EAGAIN);
+ return ret;
+}
+
+TEST_F(RtSignalTest, InvalidTID) {
+ siginfo_t uinfo;
+ // Depending on the kernel version, these calls may fail with
+ // ESRCH (goobunutu machines) or EPERM (production machines). Thus,
+ // the test simply ensures that they do fail.
+ EXPECT_THAT(rt_sigqueueinfo(-1, SIGUSR1, &uinfo), SyscallFails());
+ EXPECT_FALSE(has_saved_info);
+ EXPECT_THAT(rt_sigqueueinfo(0, SIGUSR1, &uinfo), SyscallFails());
+ EXPECT_FALSE(has_saved_info);
+}
+
+TEST_F(RtSignalTest, InvalidCodes) {
+ siginfo_t uinfo;
+
+ // We need a child for the code checks to apply. If the process is delivering
+ // to itself, then it can use whatever codes it wants and they will go
+ // through.
+ pid_t child = fork();
+ if (child == 0) {
+ _exit(1);
+ }
+ ASSERT_THAT(child, SyscallSucceeds());
+
+ // These are not allowed for child processes.
+ uinfo.si_code = 0; // SI_USER.
+ EXPECT_THAT(rt_sigqueueinfo(child, SIGUSR1, &uinfo),
+ SyscallFailsWithErrno(EPERM));
+ uinfo.si_code = 0x80; // SI_KERNEL.
+ EXPECT_THAT(rt_sigqueueinfo(child, SIGUSR1, &uinfo),
+ SyscallFailsWithErrno(EPERM));
+ uinfo.si_code = -6; // SI_TKILL.
+ EXPECT_THAT(rt_sigqueueinfo(child, SIGUSR1, &uinfo),
+ SyscallFailsWithErrno(EPERM));
+ uinfo.si_code = -1; // SI_QUEUE (allowed).
+ EXPECT_THAT(rt_sigqueueinfo(child, SIGUSR1, &uinfo), SyscallSucceeds());
+
+ // Join the child process.
+ EXPECT_THAT(waitpid(child, nullptr, 0), SyscallSucceeds());
+}
+
+TEST_F(RtSignalTest, ValueDelivered) {
+ siginfo_t uinfo;
+ uinfo.si_code = -1; // SI_QUEUE (allowed).
+ uinfo.si_errno = 0x1234;
+
+ EXPECT_EQ(saved_info.si_errno, 0x0);
+ EXPECT_THAT(rt_sigqueueinfo(getpid(), SIGUSR1, &uinfo), SyscallSucceeds());
+ EXPECT_TRUE(has_saved_info);
+ EXPECT_EQ(saved_info.si_errno, 0x1234);
+}
+
+TEST_F(RtSignalTest, SignoMatch) {
+ auto action2_cleanup = ASSERT_NO_ERRNO_AND_VALUE(SetupSignalHandler(SIGUSR2));
+ auto mask2_cleanup =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGUSR2));
+
+ siginfo_t uinfo;
+ uinfo.si_code = -1; // SI_QUEUE (allowed).
+
+ EXPECT_THAT(rt_sigqueueinfo(getpid(), SIGUSR1, &uinfo), SyscallSucceeds());
+ EXPECT_TRUE(has_saved_info);
+ EXPECT_EQ(saved_info.si_signo, SIGUSR1);
+
+ ClearSavedInfo();
+
+ EXPECT_THAT(rt_sigqueueinfo(getpid(), SIGUSR2, &uinfo), SyscallSucceeds());
+ EXPECT_TRUE(has_saved_info);
+ EXPECT_EQ(saved_info.si_signo, SIGUSR2);
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
+
+int main(int argc, char** argv) {
+ // These tests depend on delivering SIGUSR1/2 to the main thread (so they can
+ // synchronously check has_saved_info). Block these so that any other threads
+ // created by TestInit will also have them blocked.
+ sigset_t set;
+ sigemptyset(&set);
+ sigaddset(&set, SIGUSR1);
+ sigaddset(&set, SIGUSR2);
+ TEST_PCHECK(sigprocmask(SIG_BLOCK, &set, nullptr) == 0);
+
+ gvisor::testing::TestInit(&argc, &argv);
+
+ return RUN_ALL_TESTS();
+}
diff --git a/test/syscalls/linux/sched.cc b/test/syscalls/linux/sched.cc
new file mode 100644
index 000000000..60cb6c443
--- /dev/null
+++ b/test/syscalls/linux/sched.cc
@@ -0,0 +1,71 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <sched.h>
+
+#include "gtest/gtest.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// In linux, pid is limited to 29 bits because how futex is implemented.
+constexpr int kImpossiblePID = (1 << 29) + 1;
+
+TEST(SchedGetparamTest, ReturnsZero) {
+ struct sched_param param;
+ EXPECT_THAT(sched_getparam(getpid(), &param), SyscallSucceeds());
+ EXPECT_EQ(param.sched_priority, 0);
+ EXPECT_THAT(sched_getparam(/*pid=*/0, &param), SyscallSucceeds());
+ EXPECT_EQ(param.sched_priority, 0);
+}
+
+TEST(SchedGetparamTest, InvalidPIDReturnsEINVAL) {
+ struct sched_param param;
+ EXPECT_THAT(sched_getparam(/*pid=*/-1, &param),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(SchedGetparamTest, ImpossiblePIDReturnsESRCH) {
+ struct sched_param param;
+ EXPECT_THAT(sched_getparam(kImpossiblePID, &param),
+ SyscallFailsWithErrno(ESRCH));
+}
+
+TEST(SchedGetparamTest, NullParamReturnsEINVAL) {
+ EXPECT_THAT(sched_getparam(0, nullptr), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(SchedGetschedulerTest, ReturnsSchedOther) {
+ EXPECT_THAT(sched_getscheduler(getpid()),
+ SyscallSucceedsWithValue(SCHED_OTHER));
+ EXPECT_THAT(sched_getscheduler(/*pid=*/0),
+ SyscallSucceedsWithValue(SCHED_OTHER));
+}
+
+TEST(SchedGetschedulerTest, ReturnsEINVAL) {
+ EXPECT_THAT(sched_getscheduler(/*pid=*/-1), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(SchedGetschedulerTest, ReturnsESRCH) {
+ EXPECT_THAT(sched_getscheduler(kImpossiblePID), SyscallFailsWithErrno(ESRCH));
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/sched_yield.cc b/test/syscalls/linux/sched_yield.cc
new file mode 100644
index 000000000..fc45aa5c2
--- /dev/null
+++ b/test/syscalls/linux/sched_yield.cc
@@ -0,0 +1,33 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sched.h>
+
+#include "gtest/gtest.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(SchedYieldTest, Success) {
+ EXPECT_THAT(sched_yield(), SyscallSucceeds());
+ EXPECT_THAT(sched_yield(), SyscallSucceeds());
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/seccomp.cc b/test/syscalls/linux/seccomp.cc
new file mode 100644
index 000000000..d6ac166a4
--- /dev/null
+++ b/test/syscalls/linux/seccomp.cc
@@ -0,0 +1,374 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <linux/audit.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+#include <time.h>
+#include <unistd.h>
+#include <atomic>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/base/macros.h"
+#include "test/util/logging.h"
+#include "test/util/memory_util.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/proc_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+#ifndef SYS_SECCOMP
+#define SYS_SECCOMP 1
+#endif
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// A syscall not implemented by Linux that we don't expect to be called.
+constexpr uint32_t kFilteredSyscall = SYS_vserver;
+
+// Applies a seccomp-bpf filter that returns `filtered_result` for
+// `sysno` and allows all other syscalls. Async-signal-safe.
+void ApplySeccompFilter(uint32_t sysno, uint32_t filtered_result,
+ uint32_t flags = 0) {
+ // "Prior to [PR_SET_SECCOMP], the task must call prctl(PR_SET_NO_NEW_PRIVS,
+ // 1) or run with CAP_SYS_ADMIN privileges in its namespace." -
+ // Documentation/prctl/seccomp_filter.txt
+ //
+ // prctl(PR_SET_NO_NEW_PRIVS, 1) may be called repeatedly; calls after the
+ // first are no-ops.
+ TEST_PCHECK(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == 0);
+ MaybeSave();
+
+ struct sock_filter filter[] = {
+ // A = seccomp_data.arch
+ BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 4),
+ // if (A != AUDIT_ARCH_X86_64) goto kill
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_X86_64, 0, 4),
+ // A = seccomp_data.nr
+ BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 0),
+ // if (A != sysno) goto allow
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, sysno, 0, 1),
+ // return filtered_result
+ BPF_STMT(BPF_RET | BPF_K, filtered_result),
+ // allow: return SECCOMP_RET_ALLOW
+ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
+ // kill: return SECCOMP_RET_KILL
+ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL),
+ };
+ struct sock_fprog prog;
+ prog.len = ABSL_ARRAYSIZE(filter);
+ prog.filter = filter;
+ if (flags) {
+ TEST_CHECK(syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER, flags, &prog) ==
+ 0);
+ } else {
+ TEST_PCHECK(prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0) == 0);
+ }
+ MaybeSave();
+}
+
+// Wrapper for sigaction. Async-signal-safe.
+void RegisterSignalHandler(int signum,
+ void (*handler)(int, siginfo_t*, void*)) {
+ struct sigaction sa = {};
+ sa.sa_sigaction = handler;
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = SA_SIGINFO;
+ TEST_PCHECK(sigaction(signum, &sa, nullptr) == 0);
+ MaybeSave();
+}
+
+// All of the following tests execute in a subprocess to ensure that each test
+// is run in a separate process. This avoids cross-contamination of seccomp
+// state between tests, and is necessary to ensure that test processes killed
+// by SECCOMP_RET_KILL are single-threaded (since SECCOMP_RET_KILL only kills
+// the offending thread, not the whole thread group).
+
+TEST(SeccompTest, RetKillCausesDeathBySIGSYS) {
+ pid_t const pid = fork();
+ if (pid == 0) {
+ // Register a signal handler for SIGSYS that we don't expect to be invoked.
+ RegisterSignalHandler(SIGSYS, +[](int, siginfo_t*, void*) { _exit(1); });
+ ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_KILL);
+ syscall(kFilteredSyscall);
+ TEST_CHECK_MSG(false, "Survived invocation of test syscall");
+ }
+ ASSERT_THAT(pid, SyscallSucceeds());
+ int status;
+ ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+ EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGSYS)
+ << "status " << status;
+}
+
+TEST(SeccompTest, RetKillOnlyKillsOneThread) {
+ Mapping stack = ASSERT_NO_ERRNO_AND_VALUE(
+ MmapAnon(2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+
+ pid_t const pid = fork();
+ if (pid == 0) {
+ // Register a signal handler for SIGSYS that we don't expect to be invoked.
+ RegisterSignalHandler(SIGSYS, +[](int, siginfo_t*, void*) { _exit(1); });
+ ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_KILL);
+ // Pass CLONE_VFORK to block the original thread in the child process until
+ // the clone thread exits with SIGSYS.
+ //
+ // N.B. clone(2) is not officially async-signal-safe, but at minimum glibc's
+ // x86_64 implementation is safe. See glibc
+ // sysdeps/unix/sysv/linux/x86_64/clone.S.
+ clone(
+ +[](void* arg) {
+ syscall(kFilteredSyscall); // should kill the thread
+ _exit(1); // should be unreachable
+ return 2; // should be very unreachable, shut up the compiler
+ },
+ stack.endptr(),
+ CLONE_FILES | CLONE_FS | CLONE_SIGHAND | CLONE_THREAD | CLONE_VM |
+ CLONE_VFORK,
+ nullptr);
+ _exit(0);
+ }
+ ASSERT_THAT(pid, SyscallSucceeds());
+ int status;
+ ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << "status " << status;
+}
+
+TEST(SeccompTest, RetTrapCausesSIGSYS) {
+ pid_t const pid = fork();
+ if (pid == 0) {
+ constexpr uint16_t kTrapValue = 0xdead;
+ RegisterSignalHandler(SIGSYS, +[](int signo, siginfo_t* info, void*) {
+ // This is a signal handler, so we must stay async-signal-safe.
+ TEST_CHECK(info->si_signo == SIGSYS);
+ TEST_CHECK(info->si_code == SYS_SECCOMP);
+ TEST_CHECK(info->si_errno == kTrapValue);
+ TEST_CHECK(info->si_call_addr != nullptr);
+ TEST_CHECK(info->si_syscall == kFilteredSyscall);
+ TEST_CHECK(info->si_arch == AUDIT_ARCH_X86_64);
+ _exit(0);
+ });
+ ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_TRAP | kTrapValue);
+ syscall(kFilteredSyscall);
+ TEST_CHECK_MSG(false, "Survived invocation of test syscall");
+ }
+ ASSERT_THAT(pid, SyscallSucceeds());
+ int status;
+ ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << "status " << status;
+}
+
+constexpr uint64_t kVsyscallTimeEntry = 0xffffffffff600400;
+
+time_t vsyscall_time(time_t* t) {
+ return reinterpret_cast<time_t (*)(time_t*)>(kVsyscallTimeEntry)(t);
+}
+
+TEST(SeccompTest, SeccompAppliesToVsyscall) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsVsyscallEnabled()));
+
+ pid_t const pid = fork();
+ if (pid == 0) {
+ constexpr uint16_t kTrapValue = 0xdead;
+ RegisterSignalHandler(SIGSYS, +[](int signo, siginfo_t* info, void*) {
+ // This is a signal handler, so we must stay async-signal-safe.
+ TEST_CHECK(info->si_signo == SIGSYS);
+ TEST_CHECK(info->si_code == SYS_SECCOMP);
+ TEST_CHECK(info->si_errno == kTrapValue);
+ TEST_CHECK(info->si_call_addr != nullptr);
+ TEST_CHECK(info->si_syscall == SYS_time);
+ TEST_CHECK(info->si_arch == AUDIT_ARCH_X86_64);
+ _exit(0);
+ });
+ ApplySeccompFilter(SYS_time, SECCOMP_RET_TRAP | kTrapValue);
+ vsyscall_time(nullptr); // Should result in death.
+ TEST_CHECK_MSG(false, "Survived invocation of test syscall");
+ }
+ ASSERT_THAT(pid, SyscallSucceeds());
+ int status;
+ ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << "status " << status;
+}
+
+TEST(SeccompTest, RetTraceWithoutPtracerReturnsENOSYS) {
+ pid_t const pid = fork();
+ if (pid == 0) {
+ ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_TRACE);
+ TEST_CHECK(syscall(kFilteredSyscall) == -1 && errno == ENOSYS);
+ _exit(0);
+ }
+ ASSERT_THAT(pid, SyscallSucceeds());
+ int status;
+ ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << "status " << status;
+}
+
+TEST(SeccompTest, RetErrnoReturnsErrno) {
+ pid_t const pid = fork();
+ if (pid == 0) {
+ // ENOTNAM: "Not a XENIX named type file"
+ ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_ERRNO | ENOTNAM);
+ TEST_CHECK(syscall(kFilteredSyscall) == -1 && errno == ENOTNAM);
+ _exit(0);
+ }
+ ASSERT_THAT(pid, SyscallSucceeds());
+ int status;
+ ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << "status " << status;
+}
+
+TEST(SeccompTest, RetAllowAllowsSyscall) {
+ pid_t const pid = fork();
+ if (pid == 0) {
+ ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_ALLOW);
+ TEST_CHECK(syscall(kFilteredSyscall) == -1 && errno == ENOSYS);
+ _exit(0);
+ }
+ ASSERT_THAT(pid, SyscallSucceeds());
+ int status;
+ ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << "status " << status;
+}
+
+// This test will validate that TSYNC will apply to all threads.
+TEST(SeccompTest, TsyncAppliesToAllThreads) {
+ Mapping stack = ASSERT_NO_ERRNO_AND_VALUE(
+ MmapAnon(2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+
+ // We don't want to apply this policy to other test runner threads, so fork.
+ const pid_t pid = fork();
+
+ if (pid == 0) {
+ // First check that we receive a ENOSYS before the policy is applied.
+ TEST_CHECK(syscall(kFilteredSyscall) == -1 && errno == ENOSYS);
+
+ // N.B. clone(2) is not officially async-signal-safe, but at minimum glibc's
+ // x86_64 implementation is safe. See glibc
+ // sysdeps/unix/sysv/linux/x86_64/clone.S.
+ clone(
+ +[](void* arg) {
+ ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_ERRNO | ENOTNAM,
+ SECCOMP_FILTER_FLAG_TSYNC);
+ return 0;
+ },
+ stack.endptr(),
+ CLONE_FILES | CLONE_FS | CLONE_SIGHAND | CLONE_THREAD | CLONE_VM |
+ CLONE_VFORK,
+ nullptr);
+
+ // Because we're using CLONE_VFORK this thread will be blocked until
+ // the second thread has released resources to our virtual memory, since
+ // we're not execing that will happen on _exit.
+
+ // Now verify that the policy applied to this thread too.
+ TEST_CHECK(syscall(kFilteredSyscall) == -1 && errno == ENOTNAM);
+ _exit(0);
+ }
+
+ ASSERT_THAT(pid, SyscallSucceeds());
+ int status = 0;
+ ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << "status " << status;
+}
+
+// This test will validate that seccomp(2) rejects unsupported flags.
+TEST(SeccompTest, SeccompRejectsUnknownFlags) {
+ constexpr uint32_t kInvalidFlag = 123;
+ ASSERT_THAT(
+ syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER, kInvalidFlag, nullptr),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(SeccompTest, LeastPermissiveFilterReturnValueApplies) {
+ // This is RetKillCausesDeathBySIGSYS, plus extra filters before and after the
+ // one that causes the kill that should be ignored.
+ pid_t const pid = fork();
+ if (pid == 0) {
+ RegisterSignalHandler(SIGSYS, +[](int, siginfo_t*, void*) { _exit(1); });
+ ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_TRACE);
+ ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_KILL);
+ ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_ERRNO | ENOTNAM);
+ syscall(kFilteredSyscall);
+ TEST_CHECK_MSG(false, "Survived invocation of test syscall");
+ }
+ ASSERT_THAT(pid, SyscallSucceeds());
+ int status;
+ ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+ EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGSYS)
+ << "status " << status;
+}
+
+// Passed as argv[1] to cause the test binary to invoke kFilteredSyscall and
+// exit. Not a real flag since flag parsing happens during initialization,
+// which may create threads.
+constexpr char kInvokeFilteredSyscallFlag[] = "--seccomp_test_child";
+
+TEST(SeccompTest, FiltersPreservedAcrossForkAndExecve) {
+ ExecveArray const grandchild_argv(
+ {"/proc/self/exe", kInvokeFilteredSyscallFlag});
+
+ pid_t const pid = fork();
+ if (pid == 0) {
+ ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_KILL);
+ pid_t const grandchild_pid = fork();
+ if (grandchild_pid == 0) {
+ execve(grandchild_argv.get()[0], grandchild_argv.get(),
+ /* envp = */ nullptr);
+ TEST_PCHECK_MSG(false, "execve failed");
+ }
+ int status;
+ TEST_PCHECK(waitpid(grandchild_pid, &status, 0) == grandchild_pid);
+ TEST_CHECK(WIFSIGNALED(status) && WTERMSIG(status) == SIGSYS);
+ _exit(0);
+ }
+ ASSERT_THAT(pid, SyscallSucceeds());
+ int status;
+ ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << "status " << status;
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
+
+int main(int argc, char** argv) {
+ if (argc >= 2 &&
+ strcmp(argv[1], gvisor::testing::kInvokeFilteredSyscallFlag) == 0) {
+ syscall(gvisor::testing::kFilteredSyscall);
+ exit(0);
+ }
+
+ gvisor::testing::TestInit(&argc, &argv);
+ return RUN_ALL_TESTS();
+}
diff --git a/test/syscalls/linux/select.cc b/test/syscalls/linux/select.cc
new file mode 100644
index 000000000..6b6fa9217
--- /dev/null
+++ b/test/syscalls/linux/select.cc
@@ -0,0 +1,128 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <limits.h>
+#include <signal.h>
+#include <sys/select.h>
+
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "absl/time/time.h"
+#include "test/syscalls/linux/base_poll_test.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+class SelectTest : public BasePollTest {
+ protected:
+ void SetUp() override { BasePollTest::SetUp(); }
+ void TearDown() override { BasePollTest::TearDown(); }
+};
+
+// See that when there are no FD sets, select behaves like sleep.
+TEST_F(SelectTest, NullFds) {
+ struct timeval timeout = absl::ToTimeval(absl::Milliseconds(10));
+ ASSERT_THAT(select(0, nullptr, nullptr, nullptr, &timeout),
+ SyscallSucceeds());
+ EXPECT_EQ(timeout.tv_sec, 0);
+ EXPECT_EQ(timeout.tv_usec, 0);
+
+ timeout = absl::ToTimeval(absl::Milliseconds(10));
+ ASSERT_THAT(select(1, nullptr, nullptr, nullptr, &timeout),
+ SyscallSucceeds());
+ EXPECT_EQ(timeout.tv_sec, 0);
+ EXPECT_EQ(timeout.tv_usec, 0);
+}
+
+TEST_F(SelectTest, NegativeNfds) {
+ EXPECT_THAT(select(-1, nullptr, nullptr, nullptr, nullptr),
+ SyscallFailsWithErrno(EINVAL));
+ EXPECT_THAT(select(-100000, nullptr, nullptr, nullptr, nullptr),
+ SyscallFailsWithErrno(EINVAL));
+ EXPECT_THAT(select(INT_MIN, nullptr, nullptr, nullptr, nullptr),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_F(SelectTest, ClosedFds) {
+ fd_set read_set;
+ FD_ZERO(&read_set);
+ int fd;
+ ASSERT_THAT(fd = dup(1), SyscallSucceeds());
+ ASSERT_THAT(close(fd), SyscallSucceeds());
+ FD_SET(fd, &read_set);
+ struct timeval timeout = absl::ToTimeval(absl::Milliseconds(10));
+ EXPECT_THAT(select(fd + 1, &read_set, nullptr, nullptr, &timeout),
+ SyscallFailsWithErrno(EBADF));
+}
+
+TEST_F(SelectTest, ZeroTimeout) {
+ struct timeval timeout = {};
+ EXPECT_THAT(select(1, nullptr, nullptr, nullptr, &timeout),
+ SyscallSucceeds());
+ // Ignore timeout as its value is now undefined.
+}
+
+// If random S/R interrupts the select, SIGALRM may be delivered before select
+// restarts, causing the select to hang forever.
+TEST_F(SelectTest, NoTimeout_NoRandomSave) {
+ // When there's no timeout, select may never return so set a timer.
+ SetTimer(absl::Milliseconds(100));
+ // See that we get interrupted by the timer.
+ ASSERT_THAT(select(1, nullptr, nullptr, nullptr, nullptr),
+ SyscallFailsWithErrno(EINTR));
+ EXPECT_TRUE(TimerFired());
+}
+
+TEST_F(SelectTest, InvalidTimeoutNegative) {
+ struct timeval timeout = absl::ToTimeval(absl::Microseconds(-1));
+ EXPECT_THAT(select(1, nullptr, nullptr, nullptr, &timeout),
+ SyscallFailsWithErrno(EINVAL));
+ // Ignore timeout as its value is now undefined.
+}
+
+// Verify that a signal interrupts select.
+//
+// If random S/R interrupts the select, SIGALRM may be delivered before select
+// restarts, causing the select to hang forever.
+TEST_F(SelectTest, InterruptedBySignal_NoRandomSave) {
+ absl::Duration duration(absl::Seconds(5));
+ struct timeval timeout = absl::ToTimeval(duration);
+ SetTimer(absl::Milliseconds(100));
+ ASSERT_FALSE(TimerFired());
+ ASSERT_THAT(select(1, nullptr, nullptr, nullptr, &timeout),
+ SyscallFailsWithErrno(EINTR));
+ EXPECT_TRUE(TimerFired());
+ // Ignore timeout as its value is now undefined.
+}
+
+TEST_F(SelectTest, IgnoreBitsAboveNfds) {
+ // fd_set is a bit array with at least FD_SETSIZE bits. Test that bits
+ // corresponding to file descriptors above nfds are ignored.
+ fd_set read_set;
+ FD_ZERO(&read_set);
+ constexpr int kNfds = 1;
+ for (int fd = kNfds; fd < FD_SETSIZE; fd++) {
+ FD_SET(fd, &read_set);
+ }
+ // Pass a zero timeout so that select returns immediately.
+ struct timeval timeout = {};
+ EXPECT_THAT(select(kNfds, &read_set, nullptr, nullptr, &timeout),
+ SyscallSucceedsWithValue(0));
+}
+
+} // namespace
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/semaphore.cc b/test/syscalls/linux/semaphore.cc
new file mode 100644
index 000000000..12e33732d
--- /dev/null
+++ b/test/syscalls/linux/semaphore.cc
@@ -0,0 +1,438 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/ipc.h>
+#include <sys/sem.h>
+#include <sys/types.h>
+#include <atomic>
+#include <cerrno>
+#include <ctime>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/base/macros.h"
+#include "absl/synchronization/mutex.h"
+#include "absl/time/clock.h"
+#include "test/util/capability_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+class AutoSem {
+ public:
+ explicit AutoSem(int id) : id_(id) {}
+ ~AutoSem() {
+ if (id_ >= 0) {
+ EXPECT_THAT(semctl(id_, 0, IPC_RMID), SyscallSucceeds());
+ }
+ }
+
+ int release() {
+ int old = id_;
+ id_ = -1;
+ return old;
+ }
+
+ int get() { return id_; }
+
+ private:
+ int id_ = -1;
+};
+
+TEST(SemaphoreTest, SemGet) {
+ // Test creation and lookup.
+ AutoSem sem(semget(1, 10, IPC_CREAT));
+ ASSERT_THAT(sem.get(), SyscallSucceeds());
+ EXPECT_THAT(semget(1, 10, IPC_CREAT), SyscallSucceedsWithValue(sem.get()));
+ EXPECT_THAT(semget(1, 9, IPC_CREAT), SyscallSucceedsWithValue(sem.get()));
+
+ // Creation and lookup failure cases.
+ EXPECT_THAT(semget(1, 11, IPC_CREAT), SyscallFailsWithErrno(EINVAL));
+ EXPECT_THAT(semget(1, -1, IPC_CREAT), SyscallFailsWithErrno(EINVAL));
+ EXPECT_THAT(semget(1, 10, IPC_CREAT | IPC_EXCL),
+ SyscallFailsWithErrno(EEXIST));
+ EXPECT_THAT(semget(2, 1, 0), SyscallFailsWithErrno(ENOENT));
+ EXPECT_THAT(semget(2, 0, IPC_CREAT), SyscallFailsWithErrno(EINVAL));
+
+ // Private semaphores never conflict.
+ AutoSem sem2(semget(IPC_PRIVATE, 1, 0));
+ AutoSem sem3(semget(IPC_PRIVATE, 1, 0));
+ ASSERT_THAT(sem2.get(), SyscallSucceeds());
+ EXPECT_NE(sem.get(), sem2.get());
+ ASSERT_THAT(sem3.get(), SyscallSucceeds());
+ EXPECT_NE(sem3.get(), sem2.get());
+}
+
+// Tests simple operations that shouldn't block in a single-thread.
+TEST(SemaphoreTest, SemOpSingleNoBlock) {
+ AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT));
+ ASSERT_THAT(sem.get(), SyscallSucceeds());
+
+ struct sembuf buf = {};
+ buf.sem_op = 1;
+ ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallSucceeds());
+
+ buf.sem_op = -1;
+ ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallSucceeds());
+
+ buf.sem_op = 0;
+ ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallSucceeds());
+
+ // Error cases with invalid values.
+ ASSERT_THAT(semop(sem.get() + 1, &buf, 1), SyscallFailsWithErrno(EINVAL));
+
+ buf.sem_num = 1;
+ ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallFailsWithErrno(EFBIG));
+
+ ASSERT_THAT(semop(sem.get(), nullptr, 0), SyscallFailsWithErrno(EINVAL));
+}
+
+// Tests multiple operations that shouldn't block in a single-thread.
+TEST(SemaphoreTest, SemOpMultiNoBlock) {
+ AutoSem sem(semget(IPC_PRIVATE, 4, 0600 | IPC_CREAT));
+ ASSERT_THAT(sem.get(), SyscallSucceeds());
+
+ struct sembuf bufs[5] = {};
+ bufs[0].sem_num = 0;
+ bufs[0].sem_op = 10;
+ bufs[0].sem_flg = 0;
+
+ bufs[1].sem_num = 1;
+ bufs[1].sem_op = 2;
+ bufs[1].sem_flg = 0;
+
+ bufs[2].sem_num = 2;
+ bufs[2].sem_op = 3;
+ bufs[2].sem_flg = 0;
+
+ bufs[3].sem_num = 0;
+ bufs[3].sem_op = -5;
+ bufs[3].sem_flg = 0;
+
+ bufs[4].sem_num = 2;
+ bufs[4].sem_op = 2;
+ bufs[4].sem_flg = 0;
+
+ ASSERT_THAT(semop(sem.get(), bufs, ABSL_ARRAYSIZE(bufs)), SyscallSucceeds());
+
+ ASSERT_THAT(semctl(sem.get(), 0, GETVAL), SyscallSucceedsWithValue(5));
+ ASSERT_THAT(semctl(sem.get(), 1, GETVAL), SyscallSucceedsWithValue(2));
+ ASSERT_THAT(semctl(sem.get(), 2, GETVAL), SyscallSucceedsWithValue(5));
+ ASSERT_THAT(semctl(sem.get(), 3, GETVAL), SyscallSucceedsWithValue(0));
+
+ for (auto& b : bufs) {
+ b.sem_op = -b.sem_op;
+ }
+ // 0 and 3 order must be reversed, otherwise it will block.
+ std::swap(bufs[0].sem_op, bufs[3].sem_op);
+ ASSERT_THAT(RetryEINTR(semop)(sem.get(), bufs, ABSL_ARRAYSIZE(bufs)),
+ SyscallSucceeds());
+
+ // All semaphores should be back to 0 now.
+ for (size_t i = 0; i < 4; ++i) {
+ ASSERT_THAT(semctl(sem.get(), i, GETVAL), SyscallSucceedsWithValue(0));
+ }
+}
+
+// Makes a best effort attempt to ensure that operation would block.
+TEST(SemaphoreTest, SemOpBlock) {
+ AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT));
+ ASSERT_THAT(sem.get(), SyscallSucceeds());
+
+ std::atomic<int> blocked = ATOMIC_VAR_INIT(1);
+ ScopedThread th([&sem, &blocked] {
+ absl::SleepFor(absl::Milliseconds(100));
+ ASSERT_EQ(blocked.load(), 1);
+
+ struct sembuf buf = {};
+ buf.sem_op = 1;
+ ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds());
+ });
+
+ struct sembuf buf = {};
+ buf.sem_op = -1;
+ ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds());
+ blocked.store(0);
+}
+
+// Tests that IPC_NOWAIT returns with no wait.
+TEST(SemaphoreTest, SemOpNoBlock) {
+ AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT));
+ ASSERT_THAT(sem.get(), SyscallSucceeds());
+
+ struct sembuf buf = {};
+ buf.sem_flg = IPC_NOWAIT;
+
+ buf.sem_op = -1;
+ ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallFailsWithErrno(EAGAIN));
+
+ buf.sem_op = 1;
+ ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallSucceeds());
+
+ buf.sem_op = 0;
+ ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallFailsWithErrno(EAGAIN));
+}
+
+// Test runs 2 threads, one signals the other waits the same number of times.
+TEST(SemaphoreTest, SemOpSimple) {
+ AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT));
+ ASSERT_THAT(sem.get(), SyscallSucceeds());
+
+ constexpr size_t kLoops = 100;
+ ScopedThread th([&sem] {
+ struct sembuf buf = {};
+ buf.sem_op = 1;
+ for (size_t i = 0; i < kLoops; i++) {
+ // Sleep to prevent making all increments in one shot without letting
+ // the waiter wait.
+ absl::SleepFor(absl::Milliseconds(1));
+ ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallSucceeds());
+ }
+ });
+
+ struct sembuf buf = {};
+ buf.sem_op = -1;
+ for (size_t i = 0; i < kLoops; i++) {
+ ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds());
+ }
+}
+
+// Tests that semaphore can be removed while there are waiters.
+// NoRandomSave: Test relies on timing that random save throws off.
+TEST(SemaphoreTest, SemOpRemoveWithWaiter_NoRandomSave) {
+ AutoSem sem(semget(IPC_PRIVATE, 2, 0600 | IPC_CREAT));
+ ASSERT_THAT(sem.get(), SyscallSucceeds());
+
+ ScopedThread th([&sem] {
+ absl::SleepFor(absl::Milliseconds(250));
+ ASSERT_THAT(semctl(sem.release(), 0, IPC_RMID), SyscallSucceeds());
+ });
+
+ // This must happen before IPC_RMID runs above. Otherwise it fails with EINVAL
+ // instead because the semaphire has already been removed.
+ struct sembuf buf = {};
+ buf.sem_op = -1;
+ ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1),
+ SyscallFailsWithErrno(EIDRM));
+}
+
+// Semaphore isn't fair. It will execute any waiter that can satisfy the
+// request even if it gets in front of other waiters.
+TEST(SemaphoreTest, SemOpBestFitExecution) {
+ AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT));
+ ASSERT_THAT(sem.get(), SyscallSucceeds());
+
+ ScopedThread th([&sem] {
+ struct sembuf buf = {};
+ buf.sem_op = -2;
+ ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallFails());
+ // Ensure that wait will only unblock when the semaphore is removed. On
+ // EINTR retry it may race with deletion and return EINVAL.
+ ASSERT_TRUE(errno == EIDRM || errno == EINVAL) << "errno=" << errno;
+ });
+
+ // Ensures that '-1' below will unblock even though '-10' above is waiting
+ // for the same semaphore.
+ for (size_t i = 0; i < 10; ++i) {
+ struct sembuf buf = {};
+ buf.sem_op = 1;
+ ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds());
+
+ absl::SleepFor(absl::Milliseconds(10));
+
+ buf.sem_op = -1;
+ ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds());
+ }
+
+ ASSERT_THAT(semctl(sem.release(), 0, IPC_RMID), SyscallSucceeds());
+}
+
+// Executes random operations in multiple threads and verify correctness.
+TEST(SemaphoreTest, SemOpRandom) {
+ // Don't do cooperative S/R tests because there are too many syscalls in
+ // this test,
+ const DisableSave ds;
+
+ AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT));
+ ASSERT_THAT(sem.get(), SyscallSucceeds());
+
+ // Protects the seed below.
+ absl::Mutex mutex;
+ uint32_t seed = time(nullptr);
+
+ int count = 0; // Tracks semaphore value.
+ bool done = false; // Tells waiters to stop after signal threads are done.
+
+ // These threads will wait in a loop.
+ std::unique_ptr<ScopedThread> decs[5];
+ for (auto& dec : decs) {
+ dec = absl::make_unique<ScopedThread>([&sem, &mutex, &count, &seed, &done] {
+ for (size_t i = 0; i < 500; ++i) {
+ int16_t val;
+ {
+ absl::MutexLock l(&mutex);
+ if (done) {
+ return;
+ }
+ val = (rand_r(&seed) % 10 + 1); // Rand between 1 and 10.
+ count -= val;
+ }
+ struct sembuf buf = {};
+ buf.sem_op = -val;
+ ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds());
+ absl::SleepFor(absl::Milliseconds(val * 2));
+ }
+ });
+ }
+
+ // These threads will wait for zero in a loop.
+ std::unique_ptr<ScopedThread> zeros[5];
+ for (auto& zero : zeros) {
+ zero = absl::make_unique<ScopedThread>([&sem, &mutex, &done] {
+ for (size_t i = 0; i < 500; ++i) {
+ {
+ absl::MutexLock l(&mutex);
+ if (done) {
+ return;
+ }
+ }
+ struct sembuf buf = {};
+ buf.sem_op = 0;
+ ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds());
+ absl::SleepFor(absl::Milliseconds(10));
+ }
+ });
+ }
+
+ // These threads will signal in a loop.
+ std::unique_ptr<ScopedThread> incs[5];
+ for (auto& inc : incs) {
+ inc = absl::make_unique<ScopedThread>([&sem, &mutex, &count, &seed] {
+ for (size_t i = 0; i < 500; ++i) {
+ int16_t val;
+ {
+ absl::MutexLock l(&mutex);
+ val = (rand_r(&seed) % 10 + 1); // Rand between 1 and 10.
+ count += val;
+ }
+ struct sembuf buf = {};
+ buf.sem_op = val;
+ ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallSucceeds());
+ absl::SleepFor(absl::Milliseconds(val * 2));
+ }
+ });
+ }
+
+ // First wait for signal threads to be done.
+ for (auto& inc : incs) {
+ inc->Join();
+ }
+
+ // Now there could be waiters blocked (remember operations are random).
+ // Notify waiters that we're done and signal semaphore just the right amount.
+ {
+ absl::MutexLock l(&mutex);
+ done = true;
+ struct sembuf buf = {};
+ buf.sem_op = -count;
+ ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallSucceeds());
+ }
+
+ // Now all waiters should unblock and exit.
+ for (auto& dec : decs) {
+ dec->Join();
+ }
+ for (auto& zero : zeros) {
+ zero->Join();
+ }
+}
+
+TEST(SemaphoreTest, SemOpNamespace) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+ AutoSem sem(semget(123, 1, 0600 | IPC_CREAT | IPC_EXCL));
+ ASSERT_THAT(sem.get(), SyscallSucceeds());
+
+ ScopedThread([]() {
+ EXPECT_THAT(unshare(CLONE_NEWIPC), SyscallSucceeds());
+ AutoSem sem(semget(123, 1, 0600 | IPC_CREAT | IPC_EXCL));
+ ASSERT_THAT(sem.get(), SyscallSucceeds());
+ });
+}
+
+TEST(SemaphoreTest, SemCtlVal) {
+ AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT));
+ ASSERT_THAT(sem.get(), SyscallSucceeds());
+
+ // Semaphore must start with 0.
+ EXPECT_THAT(semctl(sem.get(), 0, GETVAL), SyscallSucceedsWithValue(0));
+
+ // Increase value and ensure waiters are woken up.
+ ScopedThread th([&sem] {
+ struct sembuf buf = {};
+ buf.sem_op = -10;
+ ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds());
+ });
+
+ ASSERT_THAT(semctl(sem.get(), 0, SETVAL, 9), SyscallSucceeds());
+ EXPECT_THAT(semctl(sem.get(), 0, GETVAL), SyscallSucceedsWithValue(9));
+
+ ASSERT_THAT(semctl(sem.get(), 0, SETVAL, 20), SyscallSucceeds());
+ const int value = semctl(sem.get(), 0, GETVAL);
+ // 10 or 20 because it could have raced with waiter above.
+ EXPECT_TRUE(value == 10 || value == 20) << "value=" << value;
+ th.Join();
+
+ // Set it back to 0 and ensure that waiters are woken up.
+ ScopedThread thZero([&sem] {
+ struct sembuf buf = {};
+ buf.sem_op = 0;
+ ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds());
+ });
+ ASSERT_THAT(semctl(sem.get(), 0, SETVAL, 0), SyscallSucceeds());
+ EXPECT_THAT(semctl(sem.get(), 0, GETVAL), SyscallSucceedsWithValue(0));
+ thZero.Join();
+}
+
+TEST(SemaphoreTest, SemIpcSet) {
+ // Drop CAP_IPC_OWNER which allows us to bypass semaphore permissions.
+ ASSERT_NO_ERRNO(SetCapability(CAP_IPC_OWNER, false));
+
+ AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT));
+ ASSERT_THAT(sem.get(), SyscallSucceeds());
+
+ struct semid_ds semid = {};
+ semid.sem_perm.uid = getuid();
+ semid.sem_perm.gid = getgid();
+
+ // Make semaphore readonly and check that signal fails.
+ semid.sem_perm.mode = 0400;
+ EXPECT_THAT(semctl(sem.get(), 0, IPC_SET, &semid), SyscallSucceeds());
+ struct sembuf buf = {};
+ buf.sem_op = 1;
+ ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallFailsWithErrno(EACCES));
+
+ // Make semaphore writeonly and check that wait for zero fails.
+ semid.sem_perm.mode = 0200;
+ EXPECT_THAT(semctl(sem.get(), 0, IPC_SET, &semid), SyscallSucceeds());
+ buf.sem_op = 0;
+ ASSERT_THAT(semop(sem.get(), &buf, 1), SyscallFailsWithErrno(EACCES));
+}
+
+} // namespace
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/sendfile.cc b/test/syscalls/linux/sendfile.cc
new file mode 100644
index 000000000..92b7b9478
--- /dev/null
+++ b/test/syscalls/linux/sendfile.cc
@@ -0,0 +1,409 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <sys/sendfile.h>
+#include <unistd.h>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/strings/string_view.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(SendFileTest, SendZeroBytes) {
+ // Create temp files.
+ const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+ // Open the input file as read only.
+ const FileDescriptor inf =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY));
+
+ // Open the output file as write only.
+ const FileDescriptor outf =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY));
+
+ // Send data and verify that sendfile returns the correct value.
+ EXPECT_THAT(sendfile(outf.get(), inf.get(), nullptr, 0),
+ SyscallSucceedsWithValue(0));
+}
+
+TEST(SendFileTest, SendTrivially) {
+ // Create temp files.
+ constexpr char kData[] = "To be, or not to be, that is the question:";
+ constexpr int kDataSize = sizeof(kData) - 1;
+ const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode));
+ const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+ // Open the input file as read only.
+ const FileDescriptor inf =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY));
+
+ // Open the output file as write only.
+ FileDescriptor outf;
+ outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY));
+
+ // Send data and verify that sendfile returns the correct value.
+ int bytes_sent;
+ EXPECT_THAT(bytes_sent = sendfile(outf.get(), inf.get(), nullptr, kDataSize),
+ SyscallSucceedsWithValue(kDataSize));
+
+ // Close outf to avoid leak.
+ outf.reset();
+
+ // Open the output file as read only.
+ outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDONLY));
+
+ // Verify that the output file has the correct data.
+ char actual[kDataSize];
+ ASSERT_THAT(read(outf.get(), &actual, bytes_sent),
+ SyscallSucceedsWithValue(kDataSize));
+ EXPECT_EQ(kData, absl::string_view(actual, bytes_sent));
+}
+
+TEST(SendFileTest, SendTriviallyWithBothFilesReadWrite) {
+ // Create temp files.
+ constexpr char kData[] = "Whether 'tis nobler in the mind to suffer";
+ constexpr int kDataSize = sizeof(kData) - 1;
+ const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode));
+ const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+ // Open the input file as readwrite.
+ const FileDescriptor inf =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDWR));
+
+ // Open the output file as readwrite.
+ FileDescriptor outf;
+ outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDWR));
+
+ // Send data and verify that sendfile returns the correct value.
+ int bytes_sent;
+ EXPECT_THAT(bytes_sent = sendfile(outf.get(), inf.get(), nullptr, kDataSize),
+ SyscallSucceedsWithValue(kDataSize));
+
+ // Close outf to avoid leak.
+ outf.reset();
+
+ // Open the output file as read only.
+ outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDONLY));
+
+ // Verify that the output file has the correct data.
+ char actual[kDataSize];
+ ASSERT_THAT(read(outf.get(), &actual, bytes_sent),
+ SyscallSucceedsWithValue(kDataSize));
+ EXPECT_EQ(kData, absl::string_view(actual, bytes_sent));
+}
+
+TEST(SendFileTest, SendAndUpdateFileOffset) {
+ // Create temp files.
+ // Test input std::string length must be > 2 AND even.
+ constexpr char kData[] = "The slings and arrows of outrageous fortune,";
+ constexpr int kDataSize = sizeof(kData) - 1;
+ constexpr int kHalfDataSize = kDataSize / 2;
+ const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode));
+ const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+ // Open the input file as read only.
+ const FileDescriptor inf =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY));
+
+ // Open the output file as write only.
+ FileDescriptor outf;
+ outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY));
+
+ // Send data and verify that sendfile returns the correct value.
+ int bytes_sent;
+ EXPECT_THAT(
+ bytes_sent = sendfile(outf.get(), inf.get(), nullptr, kHalfDataSize),
+ SyscallSucceedsWithValue(kHalfDataSize));
+
+ // Close outf to avoid leak.
+ outf.reset();
+
+ // Open the output file as read only.
+ outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDONLY));
+
+ // Verify that the output file has the correct data.
+ char actual[kHalfDataSize];
+ ASSERT_THAT(read(outf.get(), &actual, bytes_sent),
+ SyscallSucceedsWithValue(kHalfDataSize));
+ EXPECT_EQ(absl::string_view(kData, kHalfDataSize),
+ absl::string_view(actual, bytes_sent));
+
+ // Verify that the input file offset has been updated
+ ASSERT_THAT(read(inf.get(), &actual, kDataSize - bytes_sent),
+ SyscallSucceedsWithValue(kHalfDataSize));
+ EXPECT_EQ(
+ absl::string_view(kData + kDataSize - bytes_sent, kDataSize - bytes_sent),
+ absl::string_view(actual, kHalfDataSize));
+}
+
+TEST(SendFileTest, SendAndUpdateFileOffsetFromNonzeroStartingPoint) {
+ // Create temp files.
+ // Test input std::string length must be > 2 AND divisible by 4.
+ constexpr char kData[] = "The slings and arrows of outrageous fortune,";
+ constexpr int kDataSize = sizeof(kData) - 1;
+ constexpr int kHalfDataSize = kDataSize / 2;
+ constexpr int kQuarterDataSize = kHalfDataSize / 2;
+ const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode));
+ const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+ // Open the input file as read only.
+ const FileDescriptor inf =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY));
+
+ // Open the output file as write only.
+ FileDescriptor outf;
+ outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY));
+
+ // Read a quarter of the data from the infile which should update the file
+ // offset, we don't actually care about the data so it goes into the garbage.
+ char garbage[kQuarterDataSize];
+ ASSERT_THAT(read(inf.get(), &garbage, kQuarterDataSize),
+ SyscallSucceedsWithValue(kQuarterDataSize));
+
+ // Send data and verify that sendfile returns the correct value.
+ int bytes_sent;
+ EXPECT_THAT(
+ bytes_sent = sendfile(outf.get(), inf.get(), nullptr, kHalfDataSize),
+ SyscallSucceedsWithValue(kHalfDataSize));
+
+ // Close out_fd to avoid leak.
+ outf.reset();
+
+ // Open the output file as read only.
+ outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDONLY));
+
+ // Verify that the output file has the correct data.
+ char actual[kHalfDataSize];
+ ASSERT_THAT(read(outf.get(), &actual, bytes_sent),
+ SyscallSucceedsWithValue(kHalfDataSize));
+ EXPECT_EQ(absl::string_view(kData + kQuarterDataSize, kHalfDataSize),
+ absl::string_view(actual, bytes_sent));
+
+ // Verify that the input file offset has been updated
+ ASSERT_THAT(read(inf.get(), &actual, kQuarterDataSize),
+ SyscallSucceedsWithValue(kQuarterDataSize));
+
+ EXPECT_EQ(
+ absl::string_view(kData + kDataSize - kQuarterDataSize, kQuarterDataSize),
+ absl::string_view(actual, kQuarterDataSize));
+}
+
+TEST(SendFileTest, SendAndUpdateGivenOffset) {
+ // Create temp files.
+ // Test input std::string length must be >= 4 AND divisible by 4.
+ constexpr char kData[] = "Or to take Arms against a Sea of troubles,";
+ constexpr int kDataSize = sizeof(kData) + 1;
+ constexpr int kHalfDataSize = kDataSize / 2;
+ constexpr int kQuarterDataSize = kHalfDataSize / 2;
+ constexpr int kThreeFourthsDataSize = 3 * kDataSize / 4;
+
+ const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode));
+ const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+ // Open the input file as read only.
+ const FileDescriptor inf =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY));
+
+ // Open the output file as write only.
+ FileDescriptor outf;
+ outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY));
+
+ // Create offset for sending.
+ off_t offset = kQuarterDataSize;
+
+ // Send data and verify that sendfile returns the correct value.
+ int bytes_sent;
+ EXPECT_THAT(
+ bytes_sent = sendfile(outf.get(), inf.get(), &offset, kHalfDataSize),
+ SyscallSucceedsWithValue(kHalfDataSize));
+
+ // Close out_fd to avoid leak.
+ outf.reset();
+
+ // Open the output file as read only.
+ outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDONLY));
+
+ // Verify that the output file has the correct data.
+ char actual[kHalfDataSize];
+ ASSERT_THAT(read(outf.get(), &actual, bytes_sent),
+ SyscallSucceedsWithValue(kHalfDataSize));
+ EXPECT_EQ(absl::string_view(kData + kQuarterDataSize, kHalfDataSize),
+ absl::string_view(actual, bytes_sent));
+
+ // Verify that the input file offset has NOT been updated.
+ ASSERT_THAT(read(inf.get(), &actual, kHalfDataSize),
+ SyscallSucceedsWithValue(kHalfDataSize));
+ EXPECT_EQ(absl::string_view(kData, kHalfDataSize),
+ absl::string_view(actual, kHalfDataSize));
+
+ // Verify that the offset pointer has been updated.
+ EXPECT_EQ(offset, kThreeFourthsDataSize);
+}
+
+TEST(SendFileTest, DoNotSendfileIfOutfileIsAppendOnly) {
+ // Create temp files.
+ constexpr char kData[] = "And by opposing end them: to die, to sleep";
+ constexpr int kDataSize = sizeof(kData) - 1;
+
+ const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode));
+ const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+ // Open the input file as read only.
+ const FileDescriptor inf =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY));
+
+ // Open the output file as append only.
+ const FileDescriptor outf =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_APPEND));
+
+ // Send data and verify that sendfile returns the correct errno.
+ EXPECT_THAT(sendfile(outf.get(), inf.get(), nullptr, kDataSize),
+ SyscallFailsWithErrno(EBADF));
+}
+
+TEST(SendFileTest, DoNotSendfileIfOutfileIsNotWritable) {
+ // Create temp files.
+ constexpr char kData[] = "No more; and by a sleep, to say we end";
+ constexpr int kDataSize = sizeof(kData) - 1;
+
+ const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode));
+ const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+ // Open the input file as read only.
+ const FileDescriptor inf =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY));
+
+ // Open the output file as read only.
+ const FileDescriptor outf =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDONLY));
+
+ // Send data and verify that sendfile returns the correct errno.
+ EXPECT_THAT(sendfile(outf.get(), inf.get(), nullptr, kDataSize),
+ SyscallFailsWithErrno(EBADF));
+}
+
+TEST(SendFileTest, DoNotSendfileIfInfileIsNotReadable) {
+ // Create temp files.
+ constexpr char kData[] = "the heart-ache, and the thousand natural shocks";
+ constexpr int kDataSize = sizeof(kData) - 1;
+
+ const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode));
+ const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+ // Open the input file as write only.
+ const FileDescriptor inf =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_WRONLY));
+
+ // Open the output file as write only.
+ const FileDescriptor outf =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY));
+
+ // Send data and verify that sendfile returns the correct errno.
+ EXPECT_THAT(sendfile(outf.get(), inf.get(), nullptr, kDataSize),
+ SyscallFailsWithErrno(EBADF));
+}
+
+TEST(SendFileTest, DoNotSendANegativeNumberOfBytes) {
+ // Create temp files.
+ constexpr char kData[] = "that Flesh is heir to? 'Tis a consummation";
+
+ const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode));
+ const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+ // Open the input file as read only.
+ const FileDescriptor inf =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY));
+
+ // Open the output file as write only.
+ const FileDescriptor outf =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY));
+
+ // Send data and verify that sendfile returns the correct errno.
+ EXPECT_THAT(sendfile(outf.get(), inf.get(), nullptr, -1),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(SendFileTest, SendTheCorrectNumberOfBytesEvenIfWeTryToSendTooManyBytes) {
+ // Create temp files.
+ constexpr char kData[] = "devoutly to be wished. To die, to sleep,";
+ constexpr int kDataSize = sizeof(kData) - 1;
+
+ const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode));
+ const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+ // Open the input file as read only.
+ const FileDescriptor inf =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY));
+
+ // Open the output file as write only.
+ FileDescriptor outf;
+ outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY));
+
+ // Send data and verify that sendfile returns the correct value.
+ int bytes_sent;
+ EXPECT_THAT(
+ bytes_sent = sendfile(outf.get(), inf.get(), nullptr, kDataSize + 100),
+ SyscallSucceedsWithValue(kDataSize));
+
+ // Close outf to avoid leak.
+ outf.reset();
+
+ // Open the output file as read only.
+ outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDONLY));
+
+ // Verify that the output file has the correct data.
+ char actual[kDataSize];
+ ASSERT_THAT(read(outf.get(), &actual, bytes_sent),
+ SyscallSucceedsWithValue(kDataSize));
+ EXPECT_EQ(kData, absl::string_view(actual, bytes_sent));
+}
+
+TEST(SendFileTest, SendToNotARegularFile) {
+ // Make temp input directory and open as read only.
+ const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const FileDescriptor inf =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_RDONLY));
+
+ // Make temp output file and open as write only.
+ const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ const FileDescriptor outf =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY));
+
+ // Receive an error since a directory is not a regular file.
+ EXPECT_THAT(sendfile(outf.get(), inf.get(), nullptr, 0),
+ SyscallFailsWithErrno(EINVAL));
+}
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/sendfile_socket.cc b/test/syscalls/linux/sendfile_socket.cc
new file mode 100644
index 000000000..7010dc211
--- /dev/null
+++ b/test/syscalls/linux/sendfile_socket.cc
@@ -0,0 +1,156 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <sys/sendfile.h>
+#include <sys/socket.h>
+#include <unistd.h>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "absl/strings/string_view.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+// Sends large file to exercise the path that read and writes data multiple
+// times, esp. when more data is read than can be written.
+TEST(SendFileTest, SendMultiple) {
+ std::vector<char> data(5 * 1024 * 1024);
+ RandomizeBuffer(data.data(), data.size());
+
+ // Create temp files.
+ const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), absl::string_view(data.data(), data.size()),
+ TempPath::kDefaultFileMode));
+ const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+ // Use a socket for target file to make the write window small.
+ const FileDescriptor server(socket(AF_INET, SOCK_STREAM, IPPROTO_TCP));
+ ASSERT_THAT(server.get(), SyscallSucceeds());
+
+ struct sockaddr_in server_addr = {};
+ server_addr.sin_family = AF_INET;
+ server_addr.sin_addr.s_addr = INADDR_ANY;
+ ASSERT_THAT(
+ bind(server.get(), reinterpret_cast<struct sockaddr *>(&server_addr),
+ sizeof(server_addr)),
+ SyscallSucceeds());
+ ASSERT_THAT(listen(server.get(), 1), SyscallSucceeds());
+
+ // Thread that reads data from socket and dumps to a file.
+ ScopedThread th([&server, &out_file, &server_addr] {
+ socklen_t addrlen = sizeof(server_addr);
+ const FileDescriptor fd(RetryEINTR(accept)(
+ server.get(), reinterpret_cast<struct sockaddr *>(&server_addr),
+ &addrlen));
+ ASSERT_THAT(fd.get(), SyscallSucceeds());
+
+ FileDescriptor outf =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY));
+
+ // Read until socket is closed.
+ char buf[10240];
+ for (int cnt = 0;; cnt++) {
+ int r = RetryEINTR(read)(fd.get(), buf, sizeof(buf));
+ // We cannot afford to save on every read() call.
+ if (cnt % 1000 == 0) {
+ ASSERT_THAT(r, SyscallSucceeds());
+ } else {
+ const DisableSave ds;
+ ASSERT_THAT(r, SyscallSucceeds());
+ }
+ if (r == 0) {
+ // EOF
+ break;
+ }
+ int w = RetryEINTR(write)(outf.get(), buf, r);
+ // We cannot afford to save on every write() call.
+ if (cnt % 1010 == 0) {
+ ASSERT_THAT(w, SyscallSucceedsWithValue(r));
+ } else {
+ const DisableSave ds;
+ ASSERT_THAT(w, SyscallSucceedsWithValue(r));
+ }
+ }
+ });
+
+ // Open the input file as read only.
+ const FileDescriptor inf =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY));
+
+ FileDescriptor outf(socket(AF_INET, SOCK_STREAM, IPPROTO_TCP));
+ ASSERT_THAT(outf.get(), SyscallSucceeds());
+
+ // Get the port bound by the listening socket.
+ socklen_t addrlen = sizeof(server_addr);
+ ASSERT_THAT(getsockname(server.get(),
+ reinterpret_cast<sockaddr *>(&server_addr), &addrlen),
+ SyscallSucceeds());
+
+ struct sockaddr_in addr = {};
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = inet_addr("127.0.0.1");
+ addr.sin_port = server_addr.sin_port;
+ LOG(INFO) << "Connecting on port=" << server_addr.sin_port;
+ ASSERT_THAT(
+ RetryEINTR(connect)(
+ outf.get(), reinterpret_cast<struct sockaddr *>(&addr), sizeof(addr)),
+ SyscallSucceeds());
+
+ int cnt = 0;
+ for (size_t sent = 0; sent < data.size(); cnt++) {
+ const size_t remain = data.size() - sent;
+ LOG(INFO) << "sendfile, size=" << data.size() << ", sent=" << sent
+ << ", remain=" << remain;
+
+ // Send data and verify that sendfile returns the correct value.
+ int res = sendfile(outf.get(), inf.get(), nullptr, remain);
+ // We cannot afford to save on every sendfile() call.
+ if (cnt % 120 == 0) {
+ MaybeSave();
+ }
+ if (res == 0) {
+ // EOF
+ break;
+ }
+ if (res > 0) {
+ sent += res;
+ } else {
+ ASSERT_TRUE(errno == EINTR || errno == EAGAIN) << "errno=" << errno;
+ }
+ }
+
+ // Close socket to stop thread.
+ outf.reset();
+ th.Join();
+
+ // Verify that the output file has the correct data.
+ outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDONLY));
+ std::vector<char> actual(data.size(), '\0');
+ ASSERT_THAT(RetryEINTR(read)(outf.get(), actual.data(), actual.size()),
+ SyscallSucceedsWithValue(actual.size()));
+ ASSERT_EQ(memcmp(data.data(), actual.data(), data.size()), 0);
+}
+
+} // namespace
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/shm.cc b/test/syscalls/linux/shm.cc
new file mode 100644
index 000000000..9f57476c9
--- /dev/null
+++ b/test/syscalls/linux/shm.cc
@@ -0,0 +1,445 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+
+#include <sys/ipc.h>
+#include <sys/mman.h>
+#include <sys/shm.h>
+#include <sys/types.h>
+
+#include "absl/time/clock.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+using ::testing::_;
+
+const uint64_t kAllocSize = kPageSize * 128ULL;
+
+PosixErrorOr<int> Shmget(key_t key, size_t size, int shmflg) {
+ int id = shmget(key, size, shmflg);
+ if (id == -1) {
+ return PosixError(errno, "shmget() failed");
+ }
+ return id;
+}
+
+PosixErrorOr<char*> Shmat(int shmid, const void* shmaddr, int shmflg) {
+ const intptr_t addr =
+ reinterpret_cast<intptr_t>(shmat(shmid, shmaddr, shmflg));
+ if (addr == -1) {
+ return PosixError(errno, "shmat() failed");
+ }
+ return reinterpret_cast<char*>(addr);
+}
+
+PosixError Shmdt(const char* shmaddr) {
+ const int ret = shmdt(shmaddr);
+ if (ret == -1) {
+ return PosixError(errno, "shmdt() failed");
+ }
+ return NoError();
+}
+
+template <typename T>
+PosixErrorOr<int> Shmctl(int shmid, int cmd, T* buf) {
+ int ret = shmctl(shmid, cmd, reinterpret_cast<struct shmid_ds*>(buf));
+ if (ret == -1) {
+ return PosixError(errno, "shmctl() failed");
+ }
+ return ret;
+}
+
+TEST(ShmTest, AttachDetach) {
+ const int id = ASSERT_NO_ERRNO_AND_VALUE(
+ Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777));
+ struct shmid_ds attr;
+ ASSERT_NO_ERRNO(Shmctl(id, IPC_STAT, &attr));
+ EXPECT_EQ(attr.shm_segsz, kAllocSize);
+ EXPECT_EQ(attr.shm_nattch, 0);
+
+ const char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, 0));
+ ASSERT_NO_ERRNO(Shmctl(id, IPC_STAT, &attr));
+ EXPECT_EQ(attr.shm_nattch, 1);
+
+ const char* addr2 = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, 0));
+ ASSERT_NO_ERRNO(Shmctl(id, IPC_STAT, &attr));
+ EXPECT_EQ(attr.shm_nattch, 2);
+
+ ASSERT_NO_ERRNO(Shmdt(addr));
+ ASSERT_NO_ERRNO(Shmctl(id, IPC_STAT, &attr));
+ EXPECT_EQ(attr.shm_nattch, 1);
+
+ ASSERT_NO_ERRNO(Shmdt(addr2));
+ ASSERT_NO_ERRNO(Shmctl(id, IPC_STAT, &attr));
+ EXPECT_EQ(attr.shm_nattch, 0);
+}
+
+TEST(ShmTest, LookupByKey) {
+ const TempPath keyfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ const key_t key = ftok(keyfile.path().c_str(), 1);
+ const int id =
+ ASSERT_NO_ERRNO_AND_VALUE(Shmget(key, kAllocSize, IPC_CREAT | 0777));
+ const int id2 = ASSERT_NO_ERRNO_AND_VALUE(Shmget(key, kAllocSize, 0777));
+ EXPECT_EQ(id, id2);
+}
+
+TEST(ShmTest, DetachedSegmentsPersist) {
+ const int id = ASSERT_NO_ERRNO_AND_VALUE(
+ Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777));
+ char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, 0));
+ addr[0] = 'x';
+ ASSERT_NO_ERRNO(Shmdt(addr));
+
+ // We should be able to re-attach to the same segment and get our data back.
+ addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, 0));
+ EXPECT_EQ(addr[0], 'x');
+ ASSERT_NO_ERRNO(Shmdt(addr));
+}
+
+TEST(ShmTest, MultipleDetachFails) {
+ const int id = ASSERT_NO_ERRNO_AND_VALUE(
+ Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777));
+ const char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, 0));
+ ASSERT_NO_ERRNO(Shmdt(addr));
+ EXPECT_THAT(Shmdt(addr), PosixErrorIs(EINVAL, _));
+}
+
+TEST(ShmTest, IpcStat) {
+ const TempPath keyfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ const key_t key = ftok(keyfile.path().c_str(), 1);
+
+ const time_t start = time(nullptr);
+
+ const int id =
+ ASSERT_NO_ERRNO_AND_VALUE(Shmget(key, kAllocSize, IPC_CREAT | 0777));
+
+ const uid_t uid = getuid();
+ const gid_t gid = getgid();
+ const pid_t pid = getpid();
+
+ struct shmid_ds attr;
+ ASSERT_NO_ERRNO(Shmctl(id, IPC_STAT, &attr));
+
+ EXPECT_EQ(attr.shm_perm.__key, key);
+ EXPECT_EQ(attr.shm_perm.uid, uid);
+ EXPECT_EQ(attr.shm_perm.gid, gid);
+ EXPECT_EQ(attr.shm_perm.cuid, uid);
+ EXPECT_EQ(attr.shm_perm.cgid, gid);
+ EXPECT_EQ(attr.shm_perm.mode, 0777);
+
+ EXPECT_EQ(attr.shm_segsz, kAllocSize);
+
+ EXPECT_EQ(attr.shm_atime, 0);
+ EXPECT_EQ(attr.shm_dtime, 0);
+
+ // Change time is set on creation.
+ EXPECT_GE(attr.shm_ctime, start);
+
+ EXPECT_EQ(attr.shm_cpid, pid);
+ EXPECT_EQ(attr.shm_lpid, 0);
+
+ EXPECT_EQ(attr.shm_nattch, 0);
+
+ // The timestamps only have a resolution of seconds; slow down so we actually
+ // see the timestamps change.
+ absl::SleepFor(absl::Seconds(1));
+ const time_t pre_attach = time(nullptr);
+
+ const char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, 0));
+ ASSERT_NO_ERRNO(Shmctl(id, IPC_STAT, &attr));
+
+ EXPECT_GE(attr.shm_atime, pre_attach);
+ EXPECT_EQ(attr.shm_dtime, 0);
+ EXPECT_LT(attr.shm_ctime, pre_attach);
+ EXPECT_EQ(attr.shm_lpid, pid);
+ EXPECT_EQ(attr.shm_nattch, 1);
+
+ absl::SleepFor(absl::Seconds(1));
+ const time_t pre_detach = time(nullptr);
+
+ ASSERT_NO_ERRNO(Shmdt(addr));
+ ASSERT_NO_ERRNO(Shmctl(id, IPC_STAT, &attr));
+
+ EXPECT_LT(attr.shm_atime, pre_detach);
+ EXPECT_GE(attr.shm_dtime, pre_detach);
+ EXPECT_LT(attr.shm_ctime, pre_detach);
+ EXPECT_EQ(attr.shm_lpid, pid);
+ EXPECT_EQ(attr.shm_nattch, 0);
+}
+
+TEST(ShmTest, ShmStat) {
+ // This test relies on the segment we create to be the first one on the
+ // system, causing it to occupy slot 1. We can't reasonably expect this on a
+ // general Linux host.
+ SKIP_IF(!IsRunningOnGvisor());
+
+ ASSERT_NO_ERRNO(Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777));
+ struct shmid_ds attr;
+ ASSERT_NO_ERRNO(Shmctl(1, SHM_STAT, &attr));
+ // This does the same thing as IPC_STAT, so only test that the syscall
+ // succeeds here.
+}
+
+TEST(ShmTest, IpcInfo) {
+ struct shminfo info;
+ ASSERT_NO_ERRNO(Shmctl(0, IPC_INFO, &info));
+
+ EXPECT_EQ(info.shmmin, 1); // This is always 1, according to the man page.
+ EXPECT_GT(info.shmmax, info.shmmin);
+ EXPECT_GT(info.shmmni, 0);
+ EXPECT_GT(info.shmseg, 0);
+ EXPECT_GT(info.shmall, 0);
+}
+
+TEST(ShmTest, ShmInfo) {
+ struct shm_info info;
+
+ // We generally can't know what other processes on a linux machine
+ // does with shared memory segments, so we can't test specific
+ // numbers on Linux. When running under gvisor, we're guaranteed to
+ // be the only ones using shm, so we can easily verify machine-wide
+ // numbers.
+ if (IsRunningOnGvisor()) {
+ ASSERT_NO_ERRNO(Shmctl(0, SHM_INFO, &info));
+ EXPECT_EQ(info.used_ids, 0);
+ EXPECT_EQ(info.shm_tot, 0);
+ EXPECT_EQ(info.shm_rss, 0);
+ EXPECT_EQ(info.shm_swp, 0);
+ }
+
+ const int id = ASSERT_NO_ERRNO_AND_VALUE(
+ Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777));
+ const char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, 0));
+
+ ASSERT_NO_ERRNO(Shmctl(1, SHM_INFO, &info));
+
+ if (IsRunningOnGvisor()) {
+ ASSERT_NO_ERRNO(Shmctl(id, SHM_INFO, &info));
+ EXPECT_EQ(info.used_ids, 1);
+ EXPECT_EQ(info.shm_tot, kAllocSize / kPageSize);
+ EXPECT_EQ(info.shm_rss, kAllocSize / kPageSize);
+ EXPECT_EQ(info.shm_swp, 0); // Gvisor currently never swaps.
+ }
+
+ ASSERT_NO_ERRNO(Shmdt(addr));
+}
+
+TEST(ShmTest, ShmCtlSet) {
+ const int id = ASSERT_NO_ERRNO_AND_VALUE(
+ Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777));
+ const char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, 0));
+
+ struct shmid_ds attr;
+ ASSERT_NO_ERRNO(Shmctl(id, IPC_STAT, &attr));
+ ASSERT_EQ(attr.shm_perm.mode, 0777);
+
+ attr.shm_perm.mode = 0766;
+ ASSERT_NO_ERRNO(Shmctl(id, IPC_SET, &attr));
+
+ ASSERT_NO_ERRNO(Shmctl(id, IPC_STAT, &attr));
+ ASSERT_EQ(attr.shm_perm.mode, 0766);
+
+ ASSERT_NO_ERRNO(Shmdt(addr));
+}
+
+TEST(ShmTest, RemovedSegmentsAreMarkedDeleted) {
+ const int id = ASSERT_NO_ERRNO_AND_VALUE(
+ Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777));
+ const char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, 0));
+ ASSERT_NO_ERRNO(Shmctl<void>(id, IPC_RMID, nullptr));
+ struct shmid_ds attr;
+ ASSERT_NO_ERRNO(Shmctl(id, IPC_STAT, &attr));
+ EXPECT_NE(attr.shm_perm.mode & SHM_DEST, 0);
+ ASSERT_NO_ERRNO(Shmdt(addr));
+}
+
+TEST(ShmTest, RemovedSegmentsAreDestroyed) {
+ const int id = ASSERT_NO_ERRNO_AND_VALUE(
+ Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777));
+ const char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, 0));
+
+ const uint64_t alloc_pages = kAllocSize / kPageSize;
+
+ struct shm_info info;
+ ASSERT_NO_ERRNO(Shmctl(1, SHM_INFO, &info));
+ const uint64_t before = info.shm_tot;
+
+ ASSERT_NO_ERRNO(Shmctl<void>(id, IPC_RMID, nullptr));
+ ASSERT_NO_ERRNO(Shmdt(addr));
+
+ ASSERT_NO_ERRNO(Shmctl(1, SHM_INFO, &info));
+ const uint64_t after = info.shm_tot;
+ EXPECT_EQ(after, before - alloc_pages);
+}
+
+TEST(ShmTest, AllowsAttachToRemovedSegmentWithRefs) {
+ const int id = ASSERT_NO_ERRNO_AND_VALUE(
+ Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777));
+ const char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, 0));
+ ASSERT_NO_ERRNO(Shmctl<void>(id, IPC_RMID, nullptr));
+ const char* addr2 = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, 0));
+ ASSERT_NO_ERRNO(Shmdt(addr));
+ ASSERT_NO_ERRNO(Shmdt(addr2));
+}
+
+TEST(ShmTest, RemovedSegmentsAreNotDiscoverable) {
+ const TempPath keyfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ const key_t key = ftok(keyfile.path().c_str(), 1);
+ const int id =
+ ASSERT_NO_ERRNO_AND_VALUE(Shmget(key, kAllocSize, IPC_CREAT | 0777));
+ ASSERT_NO_ERRNO(Shmctl<void>(id, IPC_RMID, nullptr));
+ EXPECT_THAT(Shmget(key, kAllocSize, 0777), PosixErrorIs(ENOENT, _));
+}
+
+TEST(ShmDeathTest, ReadonlySegment) {
+ SetupGvisorDeathTest();
+ const int id = ASSERT_NO_ERRNO_AND_VALUE(
+ Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777));
+ char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, SHM_RDONLY));
+ // Reading succeeds.
+ static_cast<void>(addr[0]);
+ // Writing fails.
+ EXPECT_EXIT(addr[0] = 'x', ::testing::KilledBySignal(SIGSEGV), "");
+}
+
+TEST(ShmDeathTest, SegmentNotAccessibleAfterDetach) {
+ // This test is susceptible to races with concurrent mmaps running in parallel
+ // gtest threads since the test relies on the address freed during a shm
+ // segment destruction to remain unused. We run the test body in a forked
+ // child to guarantee a single-threaded context to avoid this.
+
+ SetupGvisorDeathTest();
+
+ const auto rest = [&] {
+ const int id = ASSERT_NO_ERRNO_AND_VALUE(
+ Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777));
+ char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, 0));
+ addr[0] = 'x';
+ ASSERT_NO_ERRNO(Shmdt(addr));
+
+ // This access should cause a SIGSEGV.
+ addr[0] = 'x';
+ };
+
+ EXPECT_THAT(InForkedProcess(rest),
+ IsPosixErrorOkAndHolds(W_EXITCODE(0, SIGSEGV)));
+}
+
+TEST(ShmTest, RequestingSegmentSmallerThanSHMMINFails) {
+ struct shminfo info;
+ ASSERT_NO_ERRNO(Shmctl(0, IPC_INFO, &info));
+ const uint64_t size = info.shmmin - 1;
+ EXPECT_THAT(Shmget(IPC_PRIVATE, size, IPC_CREAT | 0777),
+ PosixErrorIs(EINVAL, _));
+}
+
+TEST(ShmTest, RequestingSegmentLargerThanSHMMAXFails) {
+ struct shminfo info;
+ ASSERT_NO_ERRNO(Shmctl(0, IPC_INFO, &info));
+ const uint64_t size = info.shmmax + kPageSize;
+ EXPECT_THAT(Shmget(IPC_PRIVATE, size, IPC_CREAT | 0777),
+ PosixErrorIs(EINVAL, _));
+}
+
+TEST(ShmTest, RequestingUnalignedSizeSucceeds) {
+ EXPECT_NO_ERRNO(Shmget(IPC_PRIVATE, 4097, IPC_CREAT | 0777));
+}
+
+TEST(ShmTest, RequestingDuplicateCreationFails) {
+ const TempPath keyfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ const key_t key = ftok(keyfile.path().c_str(), 1);
+ ASSERT_NO_ERRNO_AND_VALUE(
+ Shmget(key, kAllocSize, IPC_CREAT | IPC_EXCL | 0777));
+ EXPECT_THAT(Shmget(key, kAllocSize, IPC_CREAT | IPC_EXCL | 0777),
+ PosixErrorIs(EEXIST, _));
+}
+
+TEST(ShmTest, SegmentsSizeFixedOnCreation) {
+ const TempPath keyfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ const key_t key = ftok(keyfile.path().c_str(), 1);
+
+ // Base segment.
+ const int id =
+ ASSERT_NO_ERRNO_AND_VALUE(Shmget(key, kAllocSize, IPC_CREAT | 0777));
+
+ // Ask for the same segment at half size. This succeeds.
+ const int id2 = ASSERT_NO_ERRNO_AND_VALUE(Shmget(key, kAllocSize / 2, 0777));
+
+ // Ask for the same segment at double size.
+ EXPECT_THAT(Shmget(key, kAllocSize * 2, 0777), PosixErrorIs(EINVAL, _));
+
+ char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, 0));
+ char* addr2 = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id2, nullptr, 0));
+
+ // We have 2 different maps...
+ EXPECT_NE(addr, addr2);
+
+ // ... And both maps are kAllocSize bytes; despite asking for a half-sized
+ // segment for the second map.
+ addr[kAllocSize - 1] = 'x';
+ addr2[kAllocSize - 1] = 'x';
+
+ ASSERT_NO_ERRNO(Shmdt(addr));
+ ASSERT_NO_ERRNO(Shmdt(addr2));
+}
+
+TEST(ShmTest, PartialUnmap) {
+ const int id = ASSERT_NO_ERRNO_AND_VALUE(
+ Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777));
+ char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, 0));
+ EXPECT_THAT(munmap(addr + (kAllocSize / 4), kAllocSize / 2),
+ SyscallSucceeds());
+ ASSERT_NO_ERRNO(Shmdt(addr));
+}
+
+// Check that sentry does not panic when asked for a zero-length private shm
+// segment.
+TEST(ShmTest, GracefullyFailOnZeroLenSegmentCreation) {
+ EXPECT_THAT(Shmget(IPC_PRIVATE, 0, 0), PosixErrorIs(EINVAL, _));
+}
+
+TEST(ShmTest, NoDestructionOfAttachedSegmentWithMultipleRmid) {
+ const int id = ASSERT_NO_ERRNO_AND_VALUE(
+ Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777));
+ char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, 0));
+ char* addr2 = ASSERT_NO_ERRNO_AND_VALUE(Shmat(id, nullptr, 0));
+
+ // There should be 2 refs to the segment from the 2 attachments, and a single
+ // self-reference. Mark the segment as destroyed more than 3 times through
+ // shmctl(RMID). If there's a bug with the ref counting, this should cause the
+ // count to drop to zero.
+ for (int i = 0; i < 6; ++i) {
+ ASSERT_NO_ERRNO(Shmctl<void>(id, IPC_RMID, nullptr));
+ }
+
+ // Segment should remain accessible.
+ addr[0] = 'x';
+ ASSERT_NO_ERRNO(Shmdt(addr));
+
+ // Segment should remain accessible even after one of the two attachments are
+ // detached.
+ addr2[0] = 'x';
+ ASSERT_NO_ERRNO(Shmdt(addr2));
+}
+
+} // namespace
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/sigaction.cc b/test/syscalls/linux/sigaction.cc
new file mode 100644
index 000000000..cdd2dbf31
--- /dev/null
+++ b/test/syscalls/linux/sigaction.cc
@@ -0,0 +1,70 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <signal.h>
+
+#include "gtest/gtest.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(SigactionTest, GetLessThanOrEqualToZeroFails) {
+ struct sigaction act;
+ memset(&act, 0, sizeof(act));
+ ASSERT_THAT(sigaction(-1, NULL, &act), SyscallFailsWithErrno(EINVAL));
+ ASSERT_THAT(sigaction(0, NULL, &act), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(SigactionTest, SetLessThanOrEqualToZeroFails) {
+ struct sigaction act;
+ memset(&act, 0, sizeof(act));
+ ASSERT_THAT(sigaction(0, &act, NULL), SyscallFailsWithErrno(EINVAL));
+ ASSERT_THAT(sigaction(0, &act, NULL), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(SigactionTest, GetGreaterThanMaxFails) {
+ struct sigaction act;
+ memset(&act, 0, sizeof(act));
+ ASSERT_THAT(sigaction(SIGRTMAX + 1, NULL, &act),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(SigactionTest, SetGreaterThanMaxFails) {
+ struct sigaction act;
+ memset(&act, 0, sizeof(act));
+ ASSERT_THAT(sigaction(SIGRTMAX + 1, &act, NULL),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(SigactionTest, SetSigkillFails) {
+ struct sigaction act;
+ memset(&act, 0, sizeof(act));
+ ASSERT_THAT(sigaction(SIGKILL, NULL, &act), SyscallSucceeds());
+ ASSERT_THAT(sigaction(SIGKILL, &act, NULL), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(SigactionTest, SetSigstopFails) {
+ struct sigaction act;
+ memset(&act, 0, sizeof(act));
+ ASSERT_THAT(sigaction(SIGSTOP, NULL, &act), SyscallSucceeds());
+ ASSERT_THAT(sigaction(SIGSTOP, &act, NULL), SyscallFailsWithErrno(EINVAL));
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/sigaltstack.cc b/test/syscalls/linux/sigaltstack.cc
new file mode 100644
index 000000000..fa991545c
--- /dev/null
+++ b/test/syscalls/linux/sigaltstack.cc
@@ -0,0 +1,274 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <functional>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "test/util/cleanup.h"
+#include "test/util/fs_util.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+PosixErrorOr<Cleanup> ScopedSigaltstack(stack_t const& stack) {
+ stack_t old_stack;
+ int rc = sigaltstack(&stack, &old_stack);
+ MaybeSave();
+ if (rc < 0) {
+ return PosixError(errno, "sigaltstack failed");
+ }
+ return Cleanup([old_stack] {
+ EXPECT_THAT(sigaltstack(&old_stack, nullptr), SyscallSucceeds());
+ });
+}
+
+volatile bool got_signal = false;
+volatile int sigaltstack_errno = 0;
+volatile int ss_flags = 0;
+
+void sigaltstack_handler(int sig, siginfo_t* siginfo, void* arg) {
+ got_signal = true;
+
+ stack_t stack;
+ int ret = sigaltstack(nullptr, &stack);
+ MaybeSave();
+ if (ret < 0) {
+ sigaltstack_errno = errno;
+ return;
+ }
+ ss_flags = stack.ss_flags;
+}
+
+TEST(SigaltstackTest, Success) {
+ std::vector<char> stack_mem(SIGSTKSZ);
+ stack_t stack = {};
+ stack.ss_sp = stack_mem.data();
+ stack.ss_size = stack_mem.size();
+ auto const cleanup_sigstack =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaltstack(stack));
+
+ struct sigaction sa = {};
+ sa.sa_sigaction = sigaltstack_handler;
+ sigfillset(&sa.sa_mask);
+ sa.sa_flags = SA_SIGINFO | SA_ONSTACK;
+ auto const cleanup_sa =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGUSR1, sa));
+
+ // Send signal to this thread, as sigaltstack is per-thread.
+ EXPECT_THAT(tgkill(getpid(), gettid(), SIGUSR1), SyscallSucceeds());
+
+ EXPECT_TRUE(got_signal);
+ EXPECT_EQ(sigaltstack_errno, 0);
+ EXPECT_NE(0, ss_flags & SS_ONSTACK);
+}
+
+TEST(SigaltstackTest, ResetByExecve) {
+ std::vector<char> stack_mem(SIGSTKSZ);
+ stack_t stack = {};
+ stack.ss_sp = stack_mem.data();
+ stack.ss_size = stack_mem.size();
+ auto const cleanup_sigstack =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaltstack(stack));
+
+ std::string full_path;
+ char* test_src = getenv("TEST_SRCDIR");
+ if (test_src) {
+ full_path = JoinPath(test_src, "../../linux/sigaltstack_check");
+ }
+ ASSERT_FALSE(full_path.empty());
+
+ pid_t child_pid = -1;
+ int execve_errno = 0;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ ForkAndExec(full_path, {"sigaltstack_check"}, {}, nullptr, &child_pid,
+ &execve_errno));
+
+ ASSERT_GT(child_pid, 0);
+ ASSERT_EQ(execve_errno, 0);
+
+ int status = 0;
+ ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+}
+
+volatile bool badhandler_on_sigaltstack = true; // Set by the handler.
+char* volatile badhandler_low_water_mark = nullptr; // Set by the handler.
+volatile uint8_t badhandler_recursive_faults = 0; // Consumed by the handler.
+
+void badhandler(int sig, siginfo_t* siginfo, void* arg) {
+ char stack_var = 0;
+ char* current_ss = &stack_var;
+
+ stack_t stack;
+ int ret = sigaltstack(nullptr, &stack);
+ if (ret < 0 || (stack.ss_flags & SS_ONSTACK) != SS_ONSTACK) {
+ // We should always be marked as being on the stack. Don't allow this to hit
+ // the bottom if this is ever not true (the main test will fail as a
+ // result, but we still need to unwind the recursive faults).
+ badhandler_on_sigaltstack = false;
+ }
+ if (current_ss < badhandler_low_water_mark) {
+ // Record the low point for the signal stack. We never expected this to be
+ // before stack bottom, but this is asserted in the actual test.
+ badhandler_low_water_mark = current_ss;
+ }
+ if (badhandler_recursive_faults > 0) {
+ badhandler_recursive_faults--;
+ Fault();
+ }
+ FixupFault(reinterpret_cast<ucontext*>(arg));
+}
+
+TEST(SigaltstackTest, WalksOffBottom) {
+ // This test marks the upper half of the stack_mem array as the signal stack.
+ // It asserts that when a fault occurs in the handler (already on the signal
+ // stack), we eventually continue to fault our way off the stack. We should
+ // not revert to the top of the signal stack when we fall off the bottom and
+ // the signal stack should remain "in use". When we fall off the signal stack,
+ // we should have an unconditional signal delivered and not start using the
+ // first part of the stack_mem array.
+ std::vector<char> stack_mem(SIGSTKSZ * 2);
+ stack_t stack = {};
+ stack.ss_sp = stack_mem.data() + SIGSTKSZ; // See above: upper half.
+ stack.ss_size = SIGSTKSZ; // Only one half the array.
+ auto const cleanup_sigstack =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaltstack(stack));
+
+ // Setup the handler: this must be for SIGSEGV, and it must allow proper
+ // nesting (no signal mask, no defer) so that we can trigger multiple times.
+ //
+ // When we walk off the bottom of the signal stack and force signal delivery
+ // of a SIGSEGV, the handler will revert to the default behavior (kill).
+ struct sigaction sa = {};
+ sa.sa_sigaction = badhandler;
+ sa.sa_flags = SA_SIGINFO | SA_ONSTACK | SA_NODEFER;
+ auto const cleanup_sa =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGSEGV, sa));
+
+ // Trigger a single fault.
+ badhandler_low_water_mark =
+ reinterpret_cast<char*>(&stack.ss_sp) + SIGSTKSZ; // Expected top.
+ badhandler_recursive_faults = 0; // Disable refault.
+ Fault();
+ EXPECT_TRUE(badhandler_on_sigaltstack);
+ EXPECT_THAT(sigaltstack(nullptr, &stack), SyscallSucceeds());
+ EXPECT_EQ(stack.ss_flags & SS_ONSTACK, 0);
+ EXPECT_LT(badhandler_low_water_mark,
+ reinterpret_cast<char*>(stack.ss_sp) + 2 * SIGSTKSZ);
+ EXPECT_GT(badhandler_low_water_mark, reinterpret_cast<char*>(stack.ss_sp));
+
+ // Trigger two faults.
+ char* prev_low_water_mark = badhandler_low_water_mark; // Previous top.
+ badhandler_recursive_faults = 1; // One refault.
+ Fault();
+ ASSERT_TRUE(badhandler_on_sigaltstack);
+ EXPECT_THAT(sigaltstack(nullptr, &stack), SyscallSucceeds());
+ EXPECT_EQ(stack.ss_flags & SS_ONSTACK, 0);
+ EXPECT_LT(badhandler_low_water_mark, prev_low_water_mark);
+ EXPECT_GT(badhandler_low_water_mark, reinterpret_cast<char*>(stack.ss_sp));
+
+ // Calculate the stack growth for a fault, and set the recursive faults to
+ // ensure that the signal handler stack required exceeds our marked stack area
+ // by a minimal amount. It should remain in the valid stack_mem area so that
+ // we can test the signal is forced merely by going out of the signal stack
+ // bounds, not by a genuine fault.
+ uintptr_t frame_size =
+ static_cast<uintptr_t>(prev_low_water_mark - badhandler_low_water_mark);
+ badhandler_recursive_faults = (SIGSTKSZ + frame_size) / frame_size;
+ EXPECT_EXIT(Fault(), ::testing::KilledBySignal(SIGSEGV), "");
+}
+
+volatile int setonstack_retval = 0; // Set by the handler.
+volatile int setonstack_errno = 0; // Set by the handler.
+
+void setonstack(int sig, siginfo_t* siginfo, void* arg) {
+ char stack_mem[SIGSTKSZ];
+ stack_t stack = {};
+ stack.ss_sp = &stack_mem[0];
+ stack.ss_size = SIGSTKSZ;
+ setonstack_retval = sigaltstack(&stack, nullptr);
+ setonstack_errno = errno;
+ FixupFault(reinterpret_cast<ucontext*>(arg));
+}
+
+TEST(SigaltstackTest, SetWhileOnStack) {
+ // Reserve twice as much stack here, since the handler will allocate a vector
+ // of size SIGTKSZ and attempt to set the sigaltstack to that value.
+ std::vector<char> stack_mem(2 * SIGSTKSZ);
+ stack_t stack = {};
+ stack.ss_sp = stack_mem.data();
+ stack.ss_size = stack_mem.size();
+ auto const cleanup_sigstack =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaltstack(stack));
+
+ // See above.
+ struct sigaction sa = {};
+ sa.sa_sigaction = setonstack;
+ sa.sa_flags = SA_SIGINFO | SA_ONSTACK;
+ auto const cleanup_sa =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGSEGV, sa));
+
+ // Trigger a fault.
+ Fault();
+
+ // The set should have failed.
+ EXPECT_EQ(setonstack_retval, -1);
+ EXPECT_EQ(setonstack_errno, EPERM);
+}
+
+TEST(SigaltstackTest, SetCurrentStack) {
+ // This is executed as an exit test because once the signal stack is set to
+ // the local stack, there's no good way to unwind. We don't want to taint the
+ // test of any other tests that might run within this process.
+ EXPECT_EXIT(
+ {
+ char stack_value = 0;
+ stack_t stack = {};
+ stack.ss_sp = &stack_value - kPageSize; // Lower than current level.
+ stack.ss_size = 2 * kPageSize; // => &stack_value +/- kPageSize.
+ TEST_CHECK(sigaltstack(&stack, nullptr) == 0);
+ TEST_CHECK(sigaltstack(nullptr, &stack) == 0);
+ TEST_CHECK((stack.ss_flags & SS_ONSTACK) != 0);
+
+ // Should not be able to change the stack (even no-op).
+ TEST_CHECK(sigaltstack(&stack, nullptr) == -1 && errno == EPERM);
+
+ // Should not be able to disable the stack.
+ stack.ss_flags = SS_DISABLE;
+ TEST_CHECK(sigaltstack(&stack, nullptr) == -1 && errno == EPERM);
+ exit(0);
+ },
+ ::testing::ExitedWithCode(0), "");
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/sigaltstack_check.cc b/test/syscalls/linux/sigaltstack_check.cc
new file mode 100644
index 000000000..b71f812a8
--- /dev/null
+++ b/test/syscalls/linux/sigaltstack_check.cc
@@ -0,0 +1,33 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Checks that there is no alternate signal stack by default.
+//
+// Used by a test in sigaltstack.cc.
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "test/util/logging.h"
+
+int main(int /* argc */, char** /* argv */) {
+ stack_t stack;
+ TEST_CHECK(sigaltstack(nullptr, &stack) >= 0);
+ TEST_CHECK(stack.ss_flags == SS_DISABLE);
+ TEST_CHECK(stack.ss_sp == 0);
+ TEST_CHECK(stack.ss_size == 0);
+ return 0;
+}
diff --git a/test/syscalls/linux/sigiret.cc b/test/syscalls/linux/sigiret.cc
new file mode 100644
index 000000000..1b7cecccb
--- /dev/null
+++ b/test/syscalls/linux/sigiret.cc
@@ -0,0 +1,137 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/ucontext.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/util/logging.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+#include "test/util/timer_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+constexpr uint64_t kOrigRcx = 0xdeadbeeffacefeed;
+constexpr uint64_t kOrigR11 = 0xfacefeedbaad1dea;
+
+volatile int gotvtalrm, ready;
+
+void sigvtalrm(int sig, siginfo_t* siginfo, void* _uc) {
+ ucontext_t* uc = reinterpret_cast<ucontext_t*>(_uc);
+
+ // Verify that:
+ // - test is in the busy-wait loop waiting for signal.
+ // - %rcx and %r11 values in mcontext_t match kOrigRcx and kOrigR11.
+ if (ready &&
+ static_cast<uint64_t>(uc->uc_mcontext.gregs[REG_RCX]) == kOrigRcx &&
+ static_cast<uint64_t>(uc->uc_mcontext.gregs[REG_R11]) == kOrigR11) {
+ // Modify the values %rcx and %r11 in the ucontext. These are the
+ // values seen by the application after the signal handler returns.
+ uc->uc_mcontext.gregs[REG_RCX] = ~kOrigRcx;
+ uc->uc_mcontext.gregs[REG_R11] = ~kOrigR11;
+ gotvtalrm = 1;
+ }
+}
+
+TEST(SigIretTest, CheckRcxR11) {
+ // Setup signal handler for SIGVTALRM.
+ struct sigaction sa = {};
+ sigfillset(&sa.sa_mask);
+ sa.sa_sigaction = sigvtalrm;
+ sa.sa_flags = SA_SIGINFO;
+ auto const action_cleanup =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGVTALRM, sa));
+
+ auto const mask_cleanup =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGVTALRM));
+
+ // Setup itimer to fire after 500 msecs.
+ struct itimerval itimer = {};
+ itimer.it_value.tv_usec = 500 * 1000; // 500 msecs.
+ auto const timer_cleanup =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedItimer(ITIMER_VIRTUAL, itimer));
+
+ // Initialize %rcx and %r11 and spin until the signal handler returns.
+ uint64_t rcx = kOrigRcx;
+ uint64_t r11 = kOrigR11;
+ asm volatile(
+ "movq %[rcx], %%rcx;" // %rcx = rcx
+ "movq %[r11], %%r11;" // %r11 = r11
+ "movl $1, %[ready];" // ready = 1
+ "1: pause; cmpl $0, %[gotvtalrm]; je 1b;" // while (!gotvtalrm);
+ "movq %%rcx, %[rcx];" // rcx = %rcx
+ "movq %%r11, %[r11];" // r11 = %r11
+ : [ready] "=m"(ready), [rcx] "+m"(rcx), [r11] "+m"(r11)
+ : [gotvtalrm] "m"(gotvtalrm)
+ : "cc", "memory", "rcx", "r11");
+
+ // If sigreturn(2) returns via 'sysret' then %rcx and %r11 will be
+ // clobbered and set to 'ptregs->rip' and 'ptregs->rflags' respectively.
+ //
+ // The following check verifies that %rcx and %r11 were not clobbered
+ // when returning from the signal handler (via sigreturn(2)).
+ EXPECT_EQ(rcx, ~kOrigRcx);
+ EXPECT_EQ(r11, ~kOrigR11);
+}
+
+constexpr uint64_t kNonCanonicalRip = 0xCCCC000000000000;
+
+// Test that a non-canonical signal handler faults as expected.
+TEST(SigIretTest, BadHandler) {
+ struct sigaction sa = {};
+ sa.sa_sigaction =
+ reinterpret_cast<void (*)(int, siginfo_t*, void*)>(kNonCanonicalRip);
+ auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGUSR1, sa));
+
+ pid_t pid = fork();
+ if (pid == 0) {
+ // Child, wait for signal.
+ while (1) {
+ pause();
+ }
+ }
+ ASSERT_THAT(pid, SyscallSucceeds());
+
+ EXPECT_THAT(kill(pid, SIGUSR1), SyscallSucceeds());
+
+ int status;
+ EXPECT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+ EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGSEGV)
+ << "status = " << status;
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
+
+int main(int argc, char** argv) {
+ // SigIretTest.CheckRcxR11 depends on delivering SIGVTALRM to the main thread.
+ // Block SIGVTALRM so that any other threads created by TestInit will also
+ // have SIGVTALRM blocked.
+ sigset_t set;
+ sigemptyset(&set);
+ sigaddset(&set, SIGVTALRM);
+ TEST_PCHECK(sigprocmask(SIG_BLOCK, &set, nullptr) == 0);
+
+ gvisor::testing::TestInit(&argc, &argv);
+
+ return RUN_ALL_TESTS();
+}
diff --git a/test/syscalls/linux/sigprocmask.cc b/test/syscalls/linux/sigprocmask.cc
new file mode 100644
index 000000000..d8b918446
--- /dev/null
+++ b/test/syscalls/linux/sigprocmask.cc
@@ -0,0 +1,272 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <signal.h>
+#include <stddef.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Signals numbers used for testing.
+static constexpr int kTestSignal1 = SIGUSR1;
+static constexpr int kTestSignal2 = SIGUSR2;
+
+static int raw_sigprocmask(int how, const sigset_t* set, sigset_t* oldset) {
+ return syscall(SYS_rt_sigprocmask, how, set, oldset, _NSIG / 8);
+}
+
+// count of the number of signals received
+int signal_count[kMaxSignal + 1];
+
+// signal handler increments the signal counter
+void SigHandler(int sig, siginfo_t* info, void* context) {
+ TEST_CHECK(sig > 0 && sig <= kMaxSignal);
+ signal_count[sig] += 1;
+}
+
+// The test fixture saves and restores the signal mask and
+// sets up handlers for kTestSignal1 and kTestSignal2.
+class SigProcMaskTest : public ::testing::Test {
+ protected:
+ void SetUp() override {
+ // Save the current signal mask.
+ EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, &mask_),
+ SyscallSucceeds());
+
+ // Setup signal handlers for kTestSignal1 and kTestSignal2.
+ struct sigaction sa;
+ sa.sa_sigaction = SigHandler;
+ sigfillset(&sa.sa_mask);
+ sa.sa_flags = SA_SIGINFO;
+ EXPECT_THAT(sigaction(kTestSignal1, &sa, &sa_test_sig_1_),
+ SyscallSucceeds());
+ EXPECT_THAT(sigaction(kTestSignal2, &sa, &sa_test_sig_2_),
+ SyscallSucceeds());
+
+ // Clear the signal counters.
+ memset(signal_count, 0, sizeof(signal_count));
+ }
+
+ void TearDown() override {
+ // Restore the signal mask.
+ EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, &mask_, nullptr),
+ SyscallSucceeds());
+
+ // Restore the signal handlers for kTestSignal1 and kTestSignal2.
+ EXPECT_THAT(sigaction(kTestSignal1, &sa_test_sig_1_, nullptr),
+ SyscallSucceeds());
+ EXPECT_THAT(sigaction(kTestSignal2, &sa_test_sig_2_, nullptr),
+ SyscallSucceeds());
+ }
+
+ private:
+ sigset_t mask_;
+ struct sigaction sa_test_sig_1_;
+ struct sigaction sa_test_sig_2_;
+};
+
+// Both sigsets nullptr should succeed and do nothing.
+TEST_F(SigProcMaskTest, NullAddress) {
+ EXPECT_THAT(raw_sigprocmask(SIG_BLOCK, nullptr, NULL), SyscallSucceeds());
+ EXPECT_THAT(raw_sigprocmask(SIG_UNBLOCK, nullptr, NULL), SyscallSucceeds());
+ EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, NULL), SyscallSucceeds());
+}
+
+// Bad address for either sigset should fail with EFAULT.
+TEST_F(SigProcMaskTest, BadAddress) {
+ sigset_t* bad_addr = reinterpret_cast<sigset_t*>(-1);
+
+ EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, bad_addr, nullptr),
+ SyscallFailsWithErrno(EFAULT));
+
+ EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, bad_addr),
+ SyscallFailsWithErrno(EFAULT));
+}
+
+// Bad value of the "how" parameter should fail with EINVAL.
+TEST_F(SigProcMaskTest, BadParameter) {
+ int bad_param_1 = -1;
+ int bad_param_2 = 42;
+
+ sigset_t set1;
+ sigemptyset(&set1);
+
+ EXPECT_THAT(raw_sigprocmask(bad_param_1, &set1, nullptr),
+ SyscallFailsWithErrno(EINVAL));
+
+ EXPECT_THAT(raw_sigprocmask(bad_param_2, &set1, nullptr),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+// Check that we can get the current signal mask.
+TEST_F(SigProcMaskTest, GetMask) {
+ sigset_t set1;
+ sigset_t set2;
+
+ sigemptyset(&set1);
+ sigfillset(&set2);
+ EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, &set1), SyscallSucceeds());
+ EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, &set2), SyscallSucceeds());
+ EXPECT_THAT(set1, EqualsSigset(set2));
+}
+
+// Check that we can set the signal mask.
+TEST_F(SigProcMaskTest, SetMask) {
+ sigset_t actual;
+ sigset_t expected;
+
+ // Try to mask all signals
+ sigfillset(&expected);
+ EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, &expected, nullptr),
+ SyscallSucceeds());
+ EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, &actual),
+ SyscallSucceeds());
+ // sigprocmask() should have silently ignored SIGKILL and SIGSTOP.
+ sigdelset(&expected, SIGSTOP);
+ sigdelset(&expected, SIGKILL);
+ EXPECT_THAT(actual, EqualsSigset(expected));
+
+ // Try to clear the signal mask
+ sigemptyset(&expected);
+ EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, &expected, nullptr),
+ SyscallSucceeds());
+ EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, &actual),
+ SyscallSucceeds());
+ EXPECT_THAT(actual, EqualsSigset(expected));
+
+ // Try to set a mask with one signal.
+ sigemptyset(&expected);
+ sigaddset(&expected, kTestSignal1);
+ EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, &expected, nullptr),
+ SyscallSucceeds());
+ EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, &actual),
+ SyscallSucceeds());
+ EXPECT_THAT(actual, EqualsSigset(expected));
+}
+
+// Check that we can add and remove signals.
+TEST_F(SigProcMaskTest, BlockUnblock) {
+ sigset_t actual;
+ sigset_t expected;
+
+ // Try to set a mask with one signal.
+ sigemptyset(&expected);
+ sigaddset(&expected, kTestSignal1);
+ EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, &expected, nullptr),
+ SyscallSucceeds());
+ EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, &actual),
+ SyscallSucceeds());
+ EXPECT_THAT(actual, EqualsSigset(expected));
+
+ // Try to add another signal.
+ sigset_t block;
+ sigemptyset(&block);
+ sigaddset(&block, kTestSignal2);
+ EXPECT_THAT(raw_sigprocmask(SIG_BLOCK, &block, nullptr), SyscallSucceeds());
+ EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, &actual),
+ SyscallSucceeds());
+ sigaddset(&expected, kTestSignal2);
+ EXPECT_THAT(actual, EqualsSigset(expected));
+
+ // Try to remove a signal.
+ sigset_t unblock;
+ sigemptyset(&unblock);
+ sigaddset(&unblock, kTestSignal1);
+ EXPECT_THAT(raw_sigprocmask(SIG_UNBLOCK, &unblock, nullptr),
+ SyscallSucceeds());
+ EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, nullptr, &actual),
+ SyscallSucceeds());
+ sigdelset(&expected, kTestSignal1);
+ EXPECT_THAT(actual, EqualsSigset(expected));
+}
+
+// Test that the signal mask actually blocks signals.
+TEST_F(SigProcMaskTest, SignalHandler) {
+ sigset_t mask;
+
+ // clear the signal mask
+ sigemptyset(&mask);
+ EXPECT_THAT(raw_sigprocmask(SIG_SETMASK, &mask, nullptr), SyscallSucceeds());
+
+ // Check the initial signal counts.
+ EXPECT_EQ(0, signal_count[kTestSignal1]);
+ EXPECT_EQ(0, signal_count[kTestSignal2]);
+
+ // Check that both kTestSignal1 and kTestSignal2 are not blocked.
+ raise(kTestSignal1);
+ raise(kTestSignal2);
+ EXPECT_EQ(1, signal_count[kTestSignal1]);
+ EXPECT_EQ(1, signal_count[kTestSignal2]);
+
+ // Block kTestSignal1.
+ sigaddset(&mask, kTestSignal1);
+ EXPECT_THAT(raw_sigprocmask(SIG_BLOCK, &mask, nullptr), SyscallSucceeds());
+
+ // Check that kTestSignal1 is blocked.
+ raise(kTestSignal1);
+ raise(kTestSignal2);
+ EXPECT_EQ(1, signal_count[kTestSignal1]);
+ EXPECT_EQ(2, signal_count[kTestSignal2]);
+
+ // Unblock kTestSignal1.
+ sigaddset(&mask, kTestSignal1);
+ EXPECT_THAT(raw_sigprocmask(SIG_UNBLOCK, &mask, nullptr), SyscallSucceeds());
+
+ // Check that the unblocked kTestSignal1 has been delivered.
+ // TODO: gvisor currently drops masked signals on the floor.
+ if (!IsRunningOnGvisor()) {
+ EXPECT_EQ(2, signal_count[kTestSignal1]);
+ }
+ EXPECT_EQ(2, signal_count[kTestSignal2]);
+}
+
+// Check that sigprocmask correctly handles aliasing of the set and oldset
+// pointers.
+TEST_F(SigProcMaskTest, AliasedSets) {
+ sigset_t mask;
+
+ // Set a mask in which only kTestSignal1 is blocked.
+ sigset_t mask1;
+ sigemptyset(&mask1);
+ sigaddset(&mask1, kTestSignal1);
+ mask = mask1;
+ ASSERT_THAT(raw_sigprocmask(SIG_SETMASK, &mask, nullptr), SyscallSucceeds());
+
+ // Exchange it with a mask in which only kTestSignal2 is blocked.
+ sigset_t mask2;
+ sigemptyset(&mask2);
+ sigaddset(&mask2, kTestSignal2);
+ mask = mask2;
+ ASSERT_THAT(raw_sigprocmask(SIG_SETMASK, &mask, &mask), SyscallSucceeds());
+
+ // Check that the exchange succeeeded:
+ // mask should now contain the previously-set mask blocking only kTestSignal1.
+ EXPECT_THAT(mask, EqualsSigset(mask1));
+ // The current mask should block only kTestSignal2.
+ ASSERT_THAT(raw_sigprocmask(0, nullptr, &mask), SyscallSucceeds());
+ EXPECT_THAT(mask, EqualsSigset(mask2));
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/sigstop.cc b/test/syscalls/linux/sigstop.cc
new file mode 100644
index 000000000..e21d23d51
--- /dev/null
+++ b/test/syscalls/linux/sigstop.cc
@@ -0,0 +1,150 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <signal.h>
+#include <stdlib.h>
+#include <sys/select.h>
+
+#include "gtest/gtest.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+DEFINE_bool(sigstop_test_child, false,
+ "If true, run the SigstopTest child workload.");
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+constexpr absl::Duration kChildStartupDelay = absl::Seconds(5);
+constexpr absl::Duration kChildMainThreadDelay = absl::Seconds(10);
+constexpr absl::Duration kChildExtraThreadDelay = absl::Seconds(15);
+constexpr absl::Duration kPostSIGSTOPDelay = absl::Seconds(20);
+
+// Comparisons on absl::Duration aren't yet constexpr (2017-07-14), so we
+// can't just use static_assert.
+TEST(SigstopTest, TimesAreRelativelyConsistent) {
+ EXPECT_LT(kChildStartupDelay, kChildMainThreadDelay)
+ << "Child process will exit before the parent process attempts to stop "
+ "it";
+ EXPECT_LT(kChildMainThreadDelay, kChildExtraThreadDelay)
+ << "Secondary thread in child process will exit before main thread, "
+ "causing it to exit with the wrong code";
+ EXPECT_LT(kChildExtraThreadDelay, kPostSIGSTOPDelay)
+ << "Parent process stops waiting before child process may exit if "
+ "improperly stopped, rendering the test ineffective";
+}
+
+// Exit codes communicated from the child workload to the parent test process.
+constexpr int kChildMainThreadExitCode = 10;
+constexpr int kChildExtraThreadExitCode = 11;
+
+TEST(SigstopTest, Correctness) {
+ pid_t child_pid = -1;
+ int execve_errno = 0;
+ auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(
+ ForkAndExec("/proc/self/exe", {"/proc/self/exe", "--sigstop_test_child"},
+ {}, nullptr, &child_pid, &execve_errno));
+
+ ASSERT_GT(child_pid, 0);
+ ASSERT_EQ(execve_errno, 0);
+
+ // Wait for the child subprocess to start the second thread before stopping
+ // it.
+ absl::SleepFor(kChildStartupDelay);
+ ASSERT_THAT(kill(child_pid, SIGSTOP), SyscallSucceeds());
+ int status;
+ EXPECT_THAT(RetryEINTR(waitpid)(child_pid, &status, WUNTRACED),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_TRUE(WIFSTOPPED(status));
+ EXPECT_EQ(SIGSTOP, WSTOPSIG(status));
+
+ // Sleep for longer than either of the sleeps in the child subprocess,
+ // expecting the child to stay alive because it's stopped.
+ absl::SleepFor(kPostSIGSTOPDelay);
+ ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, WNOHANG),
+ SyscallSucceedsWithValue(0));
+
+ // Resume the child.
+ ASSERT_THAT(kill(child_pid, SIGCONT), SyscallSucceeds());
+
+ EXPECT_THAT(RetryEINTR(waitpid)(child_pid, &status, WCONTINUED),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_TRUE(WIFCONTINUED(status));
+
+ // Expect it to die.
+ ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds());
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), kChildMainThreadExitCode);
+}
+
+// Like base:SleepFor, but tries to avoid counting time spent stopped due to a
+// stop signal toward the sleep.
+//
+// This is required due to an inconsistency in how nanosleep(2) and stop signals
+// interact on Linux. When nanosleep is interrupted, it writes the remaining
+// time back to its second timespec argument, so that if nanosleep is
+// interrupted by a signal handler then userspace can immediately call nanosleep
+// again with that timespec. However, if nanosleep is automatically restarted
+// (because it's interrupted by a signal that is not delivered to a handler,
+// such as a stop signal), it's restarted based on the timer's former *absolute*
+// expiration time (via ERESTART_RESTARTBLOCK => SYS_restart_syscall =>
+// hrtimer_nanosleep_restart). This means that time spent stopped is effectively
+// counted as time spent sleeping, resulting in less time spent sleeping than
+// expected.
+//
+// Dividing the sleep into multiple smaller sleeps limits the impact of this
+// effect to the length of each sleep during which a stop occurs; for example,
+// if a sleeping process is only stopped once, SleepIgnoreStopped can
+// under-sleep by at most 100ms.
+void SleepIgnoreStopped(absl::Duration d) {
+ absl::Duration const max_sleep = absl::Milliseconds(100);
+ while (d > absl::ZeroDuration()) {
+ absl::Duration to_sleep = std::min(d, max_sleep);
+ absl::SleepFor(to_sleep);
+ d -= to_sleep;
+ }
+}
+
+void RunChild() {
+ // Start another thread that attempts to call exit_group with a different
+ // error code, in order to verify that SIGSTOP stops this thread as well.
+ ScopedThread t([] {
+ SleepIgnoreStopped(kChildExtraThreadDelay);
+ exit(kChildExtraThreadExitCode);
+ });
+ SleepIgnoreStopped(kChildMainThreadDelay);
+ exit(kChildMainThreadExitCode);
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
+
+int main(int argc, char** argv) {
+ gvisor::testing::TestInit(&argc, &argv);
+
+ if (FLAGS_sigstop_test_child) {
+ gvisor::testing::RunChild();
+ return 1;
+ }
+
+ return RUN_ALL_TESTS();
+}
diff --git a/test/syscalls/linux/sigtimedwait.cc b/test/syscalls/linux/sigtimedwait.cc
new file mode 100644
index 000000000..3a350fc28
--- /dev/null
+++ b/test/syscalls/linux/sigtimedwait.cc
@@ -0,0 +1,248 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/logging.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+#include "test/util/timer_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// N.B. main() blocks SIGALRM and SIGCHLD on all threads.
+
+constexpr int kAlarmSecs = 12;
+
+void NoopHandler(int sig, siginfo_t* info, void* context) {}
+
+TEST(SigtimedwaitTest, InvalidTimeout) {
+ sigset_t mask;
+ sigemptyset(&mask);
+ struct timespec timeout = {0, 1000000001};
+ EXPECT_THAT(sigtimedwait(&mask, nullptr, &timeout),
+ SyscallFailsWithErrno(EINVAL));
+ timeout = {-1, 0};
+ EXPECT_THAT(sigtimedwait(&mask, nullptr, &timeout),
+ SyscallFailsWithErrno(EINVAL));
+ timeout = {0, -1};
+ EXPECT_THAT(sigtimedwait(&mask, nullptr, &timeout),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+// No random save as the test relies on alarm timing. Cooperative save tests
+// already cover the save between alarm and wait.
+TEST(SigtimedwaitTest, AlarmReturnsAlarm_NoRandomSave) {
+ struct itimerval itv = {};
+ itv.it_value.tv_sec = kAlarmSecs;
+ const auto itimer_cleanup =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedItimer(ITIMER_REAL, itv));
+
+ sigset_t mask;
+ sigemptyset(&mask);
+ sigaddset(&mask, SIGALRM);
+ siginfo_t info = {};
+ EXPECT_THAT(RetryEINTR(sigtimedwait)(&mask, &info, nullptr),
+ SyscallSucceedsWithValue(SIGALRM));
+ EXPECT_EQ(SIGALRM, info.si_signo);
+}
+
+// No random save as the test relies on alarm timing. Cooperative save tests
+// already cover the save between alarm and wait.
+TEST(SigtimedwaitTest, NullTimeoutReturnsEINTR_NoRandomSave) {
+ struct sigaction sa;
+ sa.sa_sigaction = NoopHandler;
+ sigfillset(&sa.sa_mask);
+ sa.sa_flags = SA_SIGINFO;
+ const auto action_cleanup =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGALRM, sa));
+
+ const auto mask_cleanup =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGALRM));
+
+ struct itimerval itv = {};
+ itv.it_value.tv_sec = kAlarmSecs;
+ const auto itimer_cleanup =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedItimer(ITIMER_REAL, itv));
+
+ sigset_t mask;
+ sigemptyset(&mask);
+ EXPECT_THAT(sigtimedwait(&mask, nullptr, nullptr),
+ SyscallFailsWithErrno(EINTR));
+}
+
+TEST(SigtimedwaitTest, LegitTimeoutReturnsEAGAIN) {
+ sigset_t mask;
+ sigemptyset(&mask);
+ struct timespec timeout = {1, 0}; // 1 second
+ EXPECT_THAT(RetryEINTR(sigtimedwait)(&mask, nullptr, &timeout),
+ SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST(SigtimedwaitTest, ZeroTimeoutReturnsEAGAIN) {
+ sigset_t mask;
+ sigemptyset(&mask);
+ struct timespec timeout = {0, 0}; // 0 second
+ EXPECT_THAT(sigtimedwait(&mask, nullptr, &timeout),
+ SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST(SigtimedwaitTest, KillGeneratedSIGCHLD) {
+ EXPECT_THAT(kill(getpid(), SIGCHLD), SyscallSucceeds());
+
+ sigset_t mask;
+ sigemptyset(&mask);
+ sigaddset(&mask, SIGCHLD);
+ struct timespec ts = {5, 0};
+ EXPECT_THAT(RetryEINTR(sigtimedwait)(&mask, nullptr, &ts),
+ SyscallSucceedsWithValue(SIGCHLD));
+}
+
+TEST(SigtimedwaitTest, ChildExitGeneratedSIGCHLD) {
+ pid_t pid = fork();
+ if (pid == 0) {
+ _exit(0);
+ }
+ ASSERT_THAT(pid, SyscallSucceeds());
+
+ int status;
+ EXPECT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) << status;
+
+ sigset_t mask;
+ sigemptyset(&mask);
+ sigaddset(&mask, SIGCHLD);
+ struct timespec ts = {5, 0};
+ EXPECT_THAT(RetryEINTR(sigtimedwait)(&mask, nullptr, &ts),
+ SyscallSucceedsWithValue(SIGCHLD));
+}
+
+TEST(SigtimedwaitTest, ChildExitGeneratedSIGCHLDWithHandler) {
+ // Setup handler for SIGCHLD, but don't unblock it.
+ struct sigaction sa;
+ sa.sa_sigaction = NoopHandler;
+ sigfillset(&sa.sa_mask);
+ sa.sa_flags = SA_SIGINFO;
+ const auto action_cleanup =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGCHLD, sa));
+
+ pid_t pid = fork();
+ if (pid == 0) {
+ _exit(0);
+ }
+ ASSERT_THAT(pid, SyscallSucceeds());
+
+ sigset_t mask;
+ sigemptyset(&mask);
+ sigaddset(&mask, SIGCHLD);
+ struct timespec ts = {5, 0};
+ EXPECT_THAT(RetryEINTR(sigtimedwait)(&mask, nullptr, &ts),
+ SyscallSucceedsWithValue(SIGCHLD));
+
+ int status;
+ EXPECT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) << status;
+}
+
+TEST(SigtimedwaitTest, IgnoredUnmaskedSignal) {
+ constexpr int kSigno = SIGUSR1;
+ constexpr auto kSigtimedwaitSetupTime = absl::Seconds(2);
+ constexpr auto kSigtimedwaitTimeout = absl::Seconds(5);
+ ASSERT_GT(kSigtimedwaitTimeout, kSigtimedwaitSetupTime);
+
+ // Ensure that kSigno is ignored, and unmasked on this thread.
+ struct sigaction sa = {};
+ sa.sa_handler = SIG_IGN;
+ const auto scoped_sigaction =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(kSigno, sa));
+ sigset_t mask;
+ sigemptyset(&mask);
+ sigaddset(&mask, kSigno);
+ auto scoped_sigmask =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, mask));
+
+ // Create a thread which will send us kSigno while we are blocked in
+ // sigtimedwait.
+ pid_t tid = gettid();
+ ScopedThread sigthread([&] {
+ absl::SleepFor(kSigtimedwaitSetupTime);
+ EXPECT_THAT(tgkill(getpid(), tid, kSigno), SyscallSucceeds());
+ });
+
+ // sigtimedwait should not observe kSigno since it is ignored and already
+ // unmasked, causing it to be dropped before it is enqueued.
+ struct timespec timeout_ts = absl::ToTimespec(kSigtimedwaitTimeout);
+ EXPECT_THAT(RetryEINTR(sigtimedwait)(&mask, nullptr, &timeout_ts),
+ SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST(SigtimedwaitTest, IgnoredMaskedSignal) {
+ constexpr int kSigno = SIGUSR1;
+ constexpr auto kSigtimedwaitSetupTime = absl::Seconds(2);
+ constexpr auto kSigtimedwaitTimeout = absl::Seconds(5);
+ ASSERT_GT(kSigtimedwaitTimeout, kSigtimedwaitSetupTime);
+
+ // Ensure that kSigno is ignored, and masked on this thread.
+ struct sigaction sa = {};
+ sa.sa_handler = SIG_IGN;
+ const auto scoped_sigaction =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(kSigno, sa));
+ sigset_t mask;
+ sigemptyset(&mask);
+ sigaddset(&mask, kSigno);
+ auto scoped_sigmask =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, mask));
+
+ // Create a thread which will send us kSigno while we are blocked in
+ // sigtimedwait.
+ pid_t tid = gettid();
+ ScopedThread sigthread([&] {
+ absl::SleepFor(kSigtimedwaitSetupTime);
+ EXPECT_THAT(tgkill(getpid(), tid, kSigno), SyscallSucceeds());
+ });
+
+ // sigtimedwait should observe kSigno since it is normally masked, causing it
+ // to be enqueued despite being ignored.
+ struct timespec timeout_ts = absl::ToTimespec(kSigtimedwaitTimeout);
+ EXPECT_THAT(RetryEINTR(sigtimedwait)(&mask, nullptr, &timeout_ts),
+ SyscallSucceedsWithValue(kSigno));
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
+
+int main(int argc, char** argv) {
+ // These tests depend on delivering SIGALRM/SIGCHLD to the main thread or in
+ // sigtimedwait. Block them so that any other threads created by TestInit will
+ // also have them blocked.
+ sigset_t set;
+ sigemptyset(&set);
+ sigaddset(&set, SIGALRM);
+ sigaddset(&set, SIGCHLD);
+ TEST_PCHECK(sigprocmask(SIG_BLOCK, &set, nullptr) == 0);
+
+ gvisor::testing::TestInit(&argc, &argv);
+
+ return RUN_ALL_TESTS();
+}
diff --git a/test/syscalls/linux/socket_abstract.cc b/test/syscalls/linux/socket_abstract.cc
new file mode 100644
index 000000000..7b111a2dd
--- /dev/null
+++ b/test/syscalls/linux/socket_abstract.cc
@@ -0,0 +1,43 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "test/syscalls/linux/socket_generic.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/socket_unix.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+ return VecCat<SocketPairKind>(ApplyVec<SocketPairKind>(
+ AbstractBoundUnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET},
+ List<int>{0, SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC})));
+}
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, AllSocketPairTest,
+ ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, UnixSocketPairTest,
+ ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_filesystem.cc b/test/syscalls/linux/socket_filesystem.cc
new file mode 100644
index 000000000..eea6f2810
--- /dev/null
+++ b/test/syscalls/linux/socket_filesystem.cc
@@ -0,0 +1,43 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "test/syscalls/linux/socket_generic.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/socket_unix.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+ return VecCat<SocketPairKind>(ApplyVec<SocketPairKind>(
+ FilesystemBoundUnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET},
+ List<int>{0, SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC})));
+}
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, AllSocketPairTest,
+ ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, UnixSocketPairTest,
+ ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_generic.cc b/test/syscalls/linux/socket_generic.cc
new file mode 100644
index 000000000..fbc3bebed
--- /dev/null
+++ b/test/syscalls/linux/socket_generic.cc
@@ -0,0 +1,403 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_generic.h"
+
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "absl/strings/string_view.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+// This file is a generic socket test file. It must be built with another file
+// that provides the test types.
+
+namespace gvisor {
+namespace testing {
+
+TEST_P(AllSocketPairTest, BasicReadWrite) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ char buf[20];
+ const std::string data = "abc";
+ ASSERT_THAT(WriteFd(sockets->first_fd(), data.c_str(), 3),
+ SyscallSucceedsWithValue(3));
+ ASSERT_THAT(ReadFd(sockets->second_fd(), buf, 3),
+ SyscallSucceedsWithValue(3));
+ EXPECT_EQ(data, absl::string_view(buf, 3));
+}
+
+TEST_P(AllSocketPairTest, BasicSendRecv) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ char sent_data[512];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+ ASSERT_THAT(
+ RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0),
+ SyscallSucceedsWithValue(sizeof(sent_data)));
+ char received_data[sizeof(sent_data)];
+ ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+ sizeof(received_data), 0),
+ SyscallSucceedsWithValue(sizeof(received_data)));
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+}
+
+TEST_P(AllSocketPairTest, BasicSendmmsg) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ char sent_data[200];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ std::vector<struct mmsghdr> msgs(10);
+ std::vector<struct iovec> iovs(msgs.size());
+ const int chunk_size = sizeof(sent_data) / msgs.size();
+ for (size_t i = 0; i < msgs.size(); i++) {
+ iovs[i].iov_len = chunk_size;
+ iovs[i].iov_base = &sent_data[i * chunk_size];
+ msgs[i].msg_hdr.msg_iov = &iovs[i];
+ msgs[i].msg_hdr.msg_iovlen = 1;
+ }
+
+ ASSERT_THAT(
+ RetryEINTR(sendmmsg)(sockets->first_fd(), &msgs[0], msgs.size(), 0),
+ SyscallSucceedsWithValue(msgs.size()));
+
+ for (const struct mmsghdr& msg : msgs) {
+ EXPECT_EQ(chunk_size, msg.msg_len);
+ }
+
+ char received_data[sizeof(sent_data)];
+ for (size_t i = 0; i < msgs.size(); i++) {
+ ASSERT_THAT(ReadFd(sockets->second_fd(), &received_data[i * chunk_size],
+ chunk_size),
+ SyscallSucceedsWithValue(chunk_size));
+ }
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+}
+
+TEST_P(AllSocketPairTest, BasicRecvmmsg) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ char sent_data[200];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ char received_data[sizeof(sent_data)];
+ std::vector<struct mmsghdr> msgs(10);
+ std::vector<struct iovec> iovs(msgs.size());
+ const int chunk_size = sizeof(sent_data) / msgs.size();
+ for (size_t i = 0; i < msgs.size(); i++) {
+ iovs[i].iov_len = chunk_size;
+ iovs[i].iov_base = &received_data[i * chunk_size];
+ msgs[i].msg_hdr.msg_iov = &iovs[i];
+ msgs[i].msg_hdr.msg_iovlen = 1;
+ }
+
+ for (size_t i = 0; i < msgs.size(); i++) {
+ ASSERT_THAT(
+ WriteFd(sockets->first_fd(), &sent_data[i * chunk_size], chunk_size),
+ SyscallSucceedsWithValue(chunk_size));
+ }
+
+ ASSERT_THAT(RetryEINTR(recvmmsg)(sockets->second_fd(), &msgs[0], msgs.size(),
+ 0, nullptr),
+ SyscallSucceedsWithValue(msgs.size()));
+
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+ for (const struct mmsghdr& msg : msgs) {
+ EXPECT_EQ(chunk_size, msg.msg_len);
+ }
+}
+
+TEST_P(AllSocketPairTest, SendmsgRecvmsg10KB) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ std::vector<char> sent_data(10 * 1024);
+ RandomizeBuffer(sent_data.data(), sent_data.size());
+ ASSERT_NO_FATAL_FAILURE(
+ SendNullCmsg(sockets->first_fd(), sent_data.data(), sent_data.size()));
+
+ std::vector<char> received_data(sent_data.size());
+ ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(sockets->second_fd(), received_data.data(),
+ received_data.size()));
+
+ EXPECT_EQ(0,
+ memcmp(sent_data.data(), received_data.data(), sent_data.size()));
+}
+
+// This test validates that a sendmsg/recvmsg w/ MSG_CTRUNC is a no-op on
+// input flags.
+TEST_P(AllSocketPairTest, SendmsgRecvmsgMsgCtruncNoop) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ std::vector<char> sent_data(10 * 1024);
+ RandomizeBuffer(sent_data.data(), sent_data.size());
+ ASSERT_NO_FATAL_FAILURE(
+ SendNullCmsg(sockets->first_fd(), sent_data.data(), sent_data.size()));
+
+ std::vector<char> received_data(sent_data.size());
+ struct msghdr msg = {};
+ char control[CMSG_SPACE(sizeof(int)) + CMSG_SPACE(sizeof(struct ucred))];
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+
+ struct iovec iov;
+ iov.iov_base = &received_data[0];
+ iov.iov_len = received_data.size();
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ // MSG_CTRUNC should be a no-op.
+ ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, MSG_CTRUNC),
+ SyscallSucceedsWithValue(received_data.size()));
+ struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+ EXPECT_EQ(cmsg, nullptr);
+ EXPECT_EQ(msg.msg_controllen, 0);
+ EXPECT_EQ(0,
+ memcmp(sent_data.data(), received_data.data(), sent_data.size()));
+}
+
+TEST_P(AllSocketPairTest, SendmsgRecvmsg16KB) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ std::vector<char> sent_data(16 * 1024);
+ RandomizeBuffer(sent_data.data(), sent_data.size());
+ ASSERT_NO_FATAL_FAILURE(
+ SendNullCmsg(sockets->first_fd(), sent_data.data(), sent_data.size()));
+
+ std::vector<char> received_data(sent_data.size());
+ ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(sockets->second_fd(), received_data.data(),
+ received_data.size()));
+
+ EXPECT_EQ(0,
+ memcmp(sent_data.data(), received_data.data(), sent_data.size()));
+}
+
+TEST_P(AllSocketPairTest, RecvmmsgInvalidTimeout) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ char buf[10];
+ struct mmsghdr msg = {};
+ struct iovec iov = {};
+ iov.iov_len = sizeof(buf);
+ iov.iov_base = buf;
+ msg.msg_hdr.msg_iov = &iov;
+ msg.msg_hdr.msg_iovlen = 1;
+ struct timespec timeout = {-1, -1};
+ ASSERT_THAT(RetryEINTR(recvmmsg)(sockets->first_fd(), &msg, 1, 0, &timeout),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(AllSocketPairTest, RecvmmsgTimeoutBeforeRecv) {
+ // There is a known bug in the Linux recvmmsg(2) causing it to block forever
+ // if the timeout expires while blocking for the first message.
+ SKIP_IF(!IsRunningOnGvisor());
+
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ char buf[10];
+ struct mmsghdr msg = {};
+ struct iovec iov = {};
+ iov.iov_len = sizeof(buf);
+ iov.iov_base = buf;
+ msg.msg_hdr.msg_iov = &iov;
+ msg.msg_hdr.msg_iovlen = 1;
+ struct timespec timeout = {};
+ ASSERT_THAT(RetryEINTR(recvmmsg)(sockets->first_fd(), &msg, 1, 0, &timeout),
+ SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST_P(AllSocketPairTest, MsgPeek) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ char sent_data[50];
+ memset(&sent_data, 0, sizeof(sent_data));
+ ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data, sizeof(sent_data)),
+ SyscallSucceedsWithValue(sizeof(sent_data)));
+
+ char received_data[sizeof(sent_data)];
+ for (int i = 0; i < 3; i++) {
+ memset(received_data, 0, sizeof(received_data));
+ EXPECT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+ sizeof(received_data), MSG_PEEK),
+ SyscallSucceedsWithValue(sizeof(received_data)));
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(received_data)));
+ }
+
+ ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+ sizeof(received_data), 0),
+ SyscallSucceedsWithValue(sizeof(received_data)));
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(received_data)));
+}
+
+TEST_P(AllSocketPairTest, LingerSocketOption) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ struct linger got_linger = {-1, -1};
+ socklen_t length = sizeof(struct linger);
+ EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_LINGER,
+ &got_linger, &length),
+ SyscallSucceedsWithValue(0));
+ struct linger want_linger = {};
+ EXPECT_EQ(0, memcmp(&want_linger, &got_linger, sizeof(struct linger)));
+ EXPECT_EQ(sizeof(struct linger), length);
+}
+
+TEST_P(AllSocketPairTest, KeepAliveSocketOption) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ int keepalive = -1;
+ socklen_t length = sizeof(int);
+ EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_KEEPALIVE,
+ &keepalive, &length),
+ SyscallSucceedsWithValue(0));
+ EXPECT_EQ(0, keepalive);
+ EXPECT_EQ(sizeof(int), length);
+}
+
+TEST_P(AllSocketPairTest, RcvBufSucceeds) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ int size = 0;
+ socklen_t size_size = sizeof(size);
+ EXPECT_THAT(
+ getsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVBUF, &size, &size_size),
+ SyscallSucceeds());
+ EXPECT_GT(size, 0);
+}
+
+TEST_P(AllSocketPairTest, SndBufSucceeds) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ int size = 0;
+ socklen_t size_size = sizeof(size);
+ EXPECT_THAT(
+ getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF, &size, &size_size),
+ SyscallSucceeds());
+ EXPECT_GT(size, 0);
+}
+
+TEST_P(AllSocketPairTest, RecvTimeoutSucceeds) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ struct timeval tv {
+ .tv_sec = 0, .tv_usec = 10
+ };
+ EXPECT_THAT(
+ setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)),
+ SyscallSucceeds());
+
+ char buf[20] = {};
+ EXPECT_THAT(RetryEINTR(recv)(sockets->first_fd(), buf, sizeof(buf), 0),
+ SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST_P(AllSocketPairTest, RecvTimeoutOneSecondSucceeds) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ struct timeval tv {
+ .tv_sec = 1, .tv_usec = 0
+ };
+ EXPECT_THAT(
+ setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)),
+ SyscallSucceeds());
+
+ char buf[20] = {};
+ EXPECT_THAT(RetryEINTR(recv)(sockets->first_fd(), buf, sizeof(buf), 0),
+ SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST_P(AllSocketPairTest, RecvmsgTimeoutSucceeds) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ struct timeval tv {
+ .tv_sec = 0, .tv_usec = 10
+ };
+ EXPECT_THAT(
+ setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)),
+ SyscallSucceeds());
+
+ struct msghdr msg = {};
+ char buf[20] = {};
+ struct iovec iov;
+ iov.iov_base = buf;
+ iov.iov_len = sizeof(buf);
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ EXPECT_THAT(RetryEINTR(recvmsg)(sockets->first_fd(), &msg, 0),
+ SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST_P(AllSocketPairTest, SoRcvTimeoIsSet) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ struct timeval tv {
+ .tv_sec = 0, .tv_usec = 35
+ };
+ EXPECT_THAT(
+ setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)),
+ SyscallSucceeds());
+}
+
+TEST_P(AllSocketPairTest, SoRcvTimeoIsSetLargerArg) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ struct timeval_with_extra {
+ struct timeval tv;
+ int64_t extra_data;
+ } ABSL_ATTRIBUTE_PACKED;
+
+ timeval_with_extra tv_extra;
+ tv_extra.tv.tv_sec = 0;
+ tv_extra.tv.tv_usec = 25;
+
+ EXPECT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO,
+ &tv_extra, sizeof(tv_extra)),
+ SyscallSucceeds());
+}
+
+TEST_P(AllSocketPairTest, RecvmsgTimeoutOneSecondSucceeds) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ struct timeval tv {
+ .tv_sec = 1, .tv_usec = 0
+ };
+ EXPECT_THAT(
+ setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)),
+ SyscallSucceeds());
+
+ struct msghdr msg = {};
+ char buf[20] = {};
+ struct iovec iov;
+ iov.iov_base = buf;
+ iov.iov_len = sizeof(buf);
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ EXPECT_THAT(RetryEINTR(recvmsg)(sockets->first_fd(), &msg, 0),
+ SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST_P(AllSocketPairTest, RecvWaitAll) {
+ SKIP_IF(IsRunningOnGvisor()); // FIXME: Support MSG_WAITALL.
+
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[100];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ ASSERT_THAT(write(sockets->first_fd(), sent_data, sizeof(sent_data)),
+ SyscallSucceedsWithValue(sizeof(sent_data)));
+
+ char received_data[sizeof(sent_data)] = {};
+ ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+ sizeof(received_data), MSG_WAITALL),
+ SyscallSucceedsWithValue(sizeof(sent_data)));
+}
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_generic.h b/test/syscalls/linux/socket_generic.h
new file mode 100644
index 000000000..cd826abcf
--- /dev/null
+++ b/test/syscalls/linux/socket_generic.h
@@ -0,0 +1,30 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_GENERIC_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_GENERIC_H_
+
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Test fixture for tests that apply to pairs of blocking and non-blocking
+// connected stream sockets.
+using AllSocketPairTest = SocketPairTest;
+
+} // namespace testing
+} // namespace gvisor
+
+#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_GENERIC_H_
diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc
new file mode 100644
index 000000000..7bdbd7797
--- /dev/null
+++ b/test/syscalls/linux/socket_inet_loopback.cc
@@ -0,0 +1,812 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <string.h>
+#include <sys/socket.h>
+
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "absl/strings/str_cat.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/posix_error.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+PosixErrorOr<uint16_t> AddrPort(int family, sockaddr_storage const& addr) {
+ switch (family) {
+ case AF_INET:
+ return static_cast<uint16_t>(
+ reinterpret_cast<sockaddr_in const*>(&addr)->sin_port);
+ case AF_INET6:
+ return static_cast<uint16_t>(
+ reinterpret_cast<sockaddr_in6 const*>(&addr)->sin6_port);
+ default:
+ return PosixError(EINVAL,
+ absl::StrCat("unknown socket family: ", family));
+ }
+}
+
+PosixError SetAddrPort(int family, sockaddr_storage* addr, uint16_t port) {
+ switch (family) {
+ case AF_INET:
+ reinterpret_cast<sockaddr_in*>(addr)->sin_port = port;
+ return NoError();
+ case AF_INET6:
+ reinterpret_cast<sockaddr_in6*>(addr)->sin6_port = port;
+ return NoError();
+ default:
+ return PosixError(EINVAL,
+ absl::StrCat("unknown socket family: ", family));
+ }
+}
+
+struct TestAddress {
+ std::string description;
+ sockaddr_storage addr;
+ socklen_t addr_len;
+
+ int family() const { return addr.ss_family; }
+ explicit TestAddress(std::string description = "")
+ : description(std::move(description)), addr(), addr_len() {}
+};
+
+TestAddress V4Any() {
+ TestAddress t("V4Any");
+ t.addr.ss_family = AF_INET;
+ t.addr_len = sizeof(sockaddr_in);
+ reinterpret_cast<sockaddr_in*>(&t.addr)->sin_addr.s_addr = htonl(INADDR_ANY);
+ return t;
+}
+
+TestAddress V4Loopback() {
+ TestAddress t("V4Loopback");
+ t.addr.ss_family = AF_INET;
+ t.addr_len = sizeof(sockaddr_in);
+ reinterpret_cast<sockaddr_in*>(&t.addr)->sin_addr.s_addr =
+ htonl(INADDR_LOOPBACK);
+ return t;
+}
+
+TestAddress V4MappedAny() {
+ TestAddress t("V4MappedAny");
+ t.addr.ss_family = AF_INET6;
+ t.addr_len = sizeof(sockaddr_in6);
+ inet_pton(AF_INET6, "::ffff:0.0.0.0",
+ reinterpret_cast<sockaddr_in6*>(&t.addr)->sin6_addr.s6_addr);
+ return t;
+}
+
+TestAddress V4MappedLoopback() {
+ TestAddress t("V4MappedLoopback");
+ t.addr.ss_family = AF_INET6;
+ t.addr_len = sizeof(sockaddr_in6);
+ inet_pton(AF_INET6, "::ffff:127.0.0.1",
+ reinterpret_cast<sockaddr_in6*>(&t.addr)->sin6_addr.s6_addr);
+ return t;
+}
+
+TestAddress V6Any() {
+ TestAddress t("V6Any");
+ t.addr.ss_family = AF_INET6;
+ t.addr_len = sizeof(sockaddr_in6);
+ reinterpret_cast<sockaddr_in6*>(&t.addr)->sin6_addr = in6addr_any;
+ return t;
+}
+
+TestAddress V6Loopback() {
+ TestAddress t("V6Loopback");
+ t.addr.ss_family = AF_INET6;
+ t.addr_len = sizeof(sockaddr_in6);
+ reinterpret_cast<sockaddr_in6*>(&t.addr)->sin6_addr = in6addr_loopback;
+ return t;
+}
+
+struct TestParam {
+ TestAddress listener;
+ TestAddress connector;
+};
+
+std::string DescribeTestParam(::testing::TestParamInfo<TestParam> const& info) {
+ return absl::StrCat("Listen", info.param.listener.description, "_Connect",
+ info.param.connector.description);
+}
+
+using SocketInetLoopbackTest = ::testing::TestWithParam<TestParam>;
+
+TEST(BadSocketPairArgs, ValidateErrForBadCallsToSocketPair) {
+ int fd[2] = {};
+
+ // Valid AF but invalid for socketpair(2) return ESOCKTNOSUPPORT.
+ ASSERT_THAT(socketpair(AF_INET, 0, 0, fd),
+ SyscallFailsWithErrno(ESOCKTNOSUPPORT));
+ ASSERT_THAT(socketpair(AF_INET6, 0, 0, fd),
+ SyscallFailsWithErrno(ESOCKTNOSUPPORT));
+
+ // Invalid AF will return ENOAFSUPPORT.
+ ASSERT_THAT(socketpair(AF_MAX, 0, 0, fd),
+ SyscallFailsWithErrno(EAFNOSUPPORT));
+ ASSERT_THAT(socketpair(8675309, 0, 0, fd),
+ SyscallFailsWithErrno(EAFNOSUPPORT));
+}
+
+TEST_P(SocketInetLoopbackTest, TCP) {
+ auto const& param = GetParam();
+
+ TestAddress const& listener = param.listener;
+ TestAddress const& connector = param.connector;
+
+ // Create the listening socket.
+ const FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE(
+ Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP));
+ sockaddr_storage listen_addr = listener.addr;
+ ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr),
+ listener.addr_len),
+ SyscallSucceeds());
+ ASSERT_THAT(listen(listen_fd.get(), SOMAXCONN), SyscallSucceeds());
+
+ // Get the port bound by the listening socket.
+ socklen_t addrlen = listener.addr_len;
+ ASSERT_THAT(getsockname(listen_fd.get(),
+ reinterpret_cast<sockaddr*>(&listen_addr), &addrlen),
+ SyscallSucceeds());
+ uint16_t const port =
+ ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr));
+
+ // Connect to the listening socket.
+ const FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE(
+ Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP));
+ sockaddr_storage conn_addr = connector.addr;
+ ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port));
+ ASSERT_THAT(RetryEINTR(connect)(conn_fd.get(),
+ reinterpret_cast<sockaddr*>(&conn_addr),
+ connector.addr_len),
+ SyscallSucceeds());
+
+ // Accept the connection.
+ ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr));
+
+ ASSERT_THAT(shutdown(listen_fd.get(), SHUT_RDWR), SyscallSucceeds());
+
+ ASSERT_THAT(shutdown(conn_fd.get(), SHUT_RDWR), SyscallSucceeds());
+}
+
+INSTANTIATE_TEST_CASE_P(
+ All, SocketInetLoopbackTest,
+ ::testing::Values(
+ // Listeners bound to IPv4 addresses refuse connections using IPv6
+ // addresses.
+ TestParam{V4Any(), V4Any()}, TestParam{V4Any(), V4Loopback()},
+ TestParam{V4Any(), V4MappedAny()},
+ TestParam{V4Any(), V4MappedLoopback()},
+ TestParam{V4Loopback(), V4Any()}, TestParam{V4Loopback(), V4Loopback()},
+ TestParam{V4Loopback(), V4MappedLoopback()},
+ TestParam{V4MappedAny(), V4Any()},
+ TestParam{V4MappedAny(), V4Loopback()},
+ TestParam{V4MappedAny(), V4MappedAny()},
+ TestParam{V4MappedAny(), V4MappedLoopback()},
+ TestParam{V4MappedLoopback(), V4Any()},
+ TestParam{V4MappedLoopback(), V4Loopback()},
+ TestParam{V4MappedLoopback(), V4MappedLoopback()},
+
+ // Listeners bound to IN6ADDR_ANY accept all connections.
+ TestParam{V6Any(), V4Any()}, TestParam{V6Any(), V4Loopback()},
+ TestParam{V6Any(), V4MappedAny()},
+ TestParam{V6Any(), V4MappedLoopback()}, TestParam{V6Any(), V6Any()},
+ TestParam{V6Any(), V6Loopback()},
+
+ // Listeners bound to IN6ADDR_LOOPBACK refuse connections using IPv4
+ // addresses.
+ TestParam{V6Loopback(), V6Any()},
+ TestParam{V6Loopback(), V6Loopback()}),
+ DescribeTestParam);
+
+struct ProtocolTestParam {
+ std::string description;
+ int type;
+};
+
+std::string DescribeProtocolTestParam(
+ ::testing::TestParamInfo<ProtocolTestParam> const& info) {
+ return info.param.description;
+}
+
+using SocketMultiProtocolInetLoopbackTest =
+ ::testing::TestWithParam<ProtocolTestParam>;
+
+TEST_P(SocketMultiProtocolInetLoopbackTest, V4MappedLoopbackOnlyReservesV4) {
+ auto const& param = GetParam();
+
+ for (int i = 0; true; i++) {
+ // Bind the v4 loopback on a dual stack socket.
+ TestAddress const& test_addr_dual = V4MappedLoopback();
+ sockaddr_storage addr_dual = test_addr_dual.addr;
+ const FileDescriptor fd_dual = ASSERT_NO_ERRNO_AND_VALUE(
+ Socket(test_addr_dual.family(), param.type, 0));
+ ASSERT_THAT(bind(fd_dual.get(), reinterpret_cast<sockaddr*>(&addr_dual),
+ test_addr_dual.addr_len),
+ SyscallSucceeds());
+
+ // Get the port that we bound.
+ socklen_t addrlen = test_addr_dual.addr_len;
+ ASSERT_THAT(getsockname(fd_dual.get(),
+ reinterpret_cast<sockaddr*>(&addr_dual), &addrlen),
+ SyscallSucceeds());
+ uint16_t const port =
+ ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr_dual.family(), addr_dual));
+
+ // Verify that we can still bind the v6 loopback on the same port.
+ TestAddress const& test_addr_v6 = V6Loopback();
+ sockaddr_storage addr_v6 = test_addr_v6.addr;
+ ASSERT_NO_ERRNO(SetAddrPort(test_addr_v6.family(), &addr_v6, port));
+ const FileDescriptor fd_v6 =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v6.family(), param.type, 0));
+ int ret = bind(fd_v6.get(), reinterpret_cast<sockaddr*>(&addr_v6),
+ test_addr_v6.addr_len);
+ if (ret == -1 && errno == EADDRINUSE) {
+ // Port may have been in use.
+ ASSERT_LT(i, 100); // Give up after 100 tries.
+ continue;
+ }
+ ASSERT_THAT(ret, SyscallSucceeds());
+
+ // Verify that binding the v4 loopback with the same port on a v4 socket
+ // fails.
+ TestAddress const& test_addr_v4 = V4Loopback();
+ sockaddr_storage addr_v4 = test_addr_v4.addr;
+ ASSERT_NO_ERRNO(SetAddrPort(test_addr_v4.family(), &addr_v4, port));
+ const FileDescriptor fd_v4 =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v4.family(), param.type, 0));
+ ASSERT_THAT(bind(fd_v4.get(), reinterpret_cast<sockaddr*>(&addr_v4),
+ test_addr_v4.addr_len),
+ SyscallFailsWithErrno(EADDRINUSE));
+
+ // No need to try again.
+ break;
+ }
+}
+
+TEST_P(SocketMultiProtocolInetLoopbackTest, V4MappedAnyOnlyReservesV4) {
+ auto const& param = GetParam();
+
+ for (int i = 0; true; i++) {
+ // Bind the v4 any on a dual stack socket.
+ TestAddress const& test_addr_dual = V4MappedAny();
+ sockaddr_storage addr_dual = test_addr_dual.addr;
+ const FileDescriptor fd_dual = ASSERT_NO_ERRNO_AND_VALUE(
+ Socket(test_addr_dual.family(), param.type, 0));
+ ASSERT_THAT(bind(fd_dual.get(), reinterpret_cast<sockaddr*>(&addr_dual),
+ test_addr_dual.addr_len),
+ SyscallSucceeds());
+
+ // Get the port that we bound.
+ socklen_t addrlen = test_addr_dual.addr_len;
+ ASSERT_THAT(getsockname(fd_dual.get(),
+ reinterpret_cast<sockaddr*>(&addr_dual), &addrlen),
+ SyscallSucceeds());
+ uint16_t const port =
+ ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr_dual.family(), addr_dual));
+
+ // Verify that we can still bind the v6 loopback on the same port.
+ TestAddress const& test_addr_v6 = V6Loopback();
+ sockaddr_storage addr_v6 = test_addr_v6.addr;
+ ASSERT_NO_ERRNO(SetAddrPort(test_addr_v6.family(), &addr_v6, port));
+ const FileDescriptor fd_v6 =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v6.family(), param.type, 0));
+ int ret = bind(fd_v6.get(), reinterpret_cast<sockaddr*>(&addr_v6),
+ test_addr_v6.addr_len);
+ if (ret == -1 && errno == EADDRINUSE) {
+ // Port may have been in use.
+ ASSERT_LT(i, 100); // Give up after 100 tries.
+ continue;
+ }
+ ASSERT_THAT(ret, SyscallSucceeds());
+
+ // Verify that binding the v4 loopback with the same port on a v4 socket
+ // fails.
+ TestAddress const& test_addr_v4 = V4Loopback();
+ sockaddr_storage addr_v4 = test_addr_v4.addr;
+ ASSERT_NO_ERRNO(SetAddrPort(test_addr_v4.family(), &addr_v4, port));
+ const FileDescriptor fd_v4 =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v4.family(), param.type, 0));
+ ASSERT_THAT(bind(fd_v4.get(), reinterpret_cast<sockaddr*>(&addr_v4),
+ test_addr_v4.addr_len),
+ SyscallFailsWithErrno(EADDRINUSE));
+
+ // No need to try again.
+ break;
+ }
+}
+
+TEST_P(SocketMultiProtocolInetLoopbackTest, DualStackV6AnyReservesEverything) {
+ auto const& param = GetParam();
+
+ // Bind the v6 any on a dual stack socket.
+ TestAddress const& test_addr_dual = V6Any();
+ sockaddr_storage addr_dual = test_addr_dual.addr;
+ const FileDescriptor fd_dual =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_dual.family(), param.type, 0));
+ ASSERT_THAT(bind(fd_dual.get(), reinterpret_cast<sockaddr*>(&addr_dual),
+ test_addr_dual.addr_len),
+ SyscallSucceeds());
+
+ // Get the port that we bound.
+ socklen_t addrlen = test_addr_dual.addr_len;
+ ASSERT_THAT(getsockname(fd_dual.get(),
+ reinterpret_cast<sockaddr*>(&addr_dual), &addrlen),
+ SyscallSucceeds());
+ uint16_t const port =
+ ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr_dual.family(), addr_dual));
+
+ // Verify that binding the v6 loopback with the same port fails.
+ TestAddress const& test_addr_v6 = V6Loopback();
+ sockaddr_storage addr_v6 = test_addr_v6.addr;
+ ASSERT_NO_ERRNO(SetAddrPort(test_addr_v6.family(), &addr_v6, port));
+ const FileDescriptor fd_v6 =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v6.family(), param.type, 0));
+ ASSERT_THAT(bind(fd_v6.get(), reinterpret_cast<sockaddr*>(&addr_v6),
+ test_addr_v6.addr_len),
+ SyscallFailsWithErrno(EADDRINUSE));
+
+ // Verify that binding the v4 loopback on the same port with a v6 socket
+ // fails.
+ TestAddress const& test_addr_v4_mapped = V4MappedLoopback();
+ sockaddr_storage addr_v4_mapped = test_addr_v4_mapped.addr;
+ ASSERT_NO_ERRNO(
+ SetAddrPort(test_addr_v4_mapped.family(), &addr_v4_mapped, port));
+ const FileDescriptor fd_v4_mapped = ASSERT_NO_ERRNO_AND_VALUE(
+ Socket(test_addr_v4_mapped.family(), param.type, 0));
+ ASSERT_THAT(
+ bind(fd_v4_mapped.get(), reinterpret_cast<sockaddr*>(&addr_v4_mapped),
+ test_addr_v4_mapped.addr_len),
+ SyscallFailsWithErrno(EADDRINUSE));
+
+ // Verify that binding the v4 loopback on the same port with a v4 socket
+ // fails.
+ TestAddress const& test_addr_v4 = V4Loopback();
+ sockaddr_storage addr_v4 = test_addr_v4.addr;
+ ASSERT_NO_ERRNO(SetAddrPort(test_addr_v4.family(), &addr_v4, port));
+ const FileDescriptor fd_v4 =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v4.family(), param.type, 0));
+ ASSERT_THAT(bind(fd_v4.get(), reinterpret_cast<sockaddr*>(&addr_v4),
+ test_addr_v4.addr_len),
+ SyscallFailsWithErrno(EADDRINUSE));
+}
+
+TEST_P(SocketMultiProtocolInetLoopbackTest, V6OnlyV6AnyReservesV6) {
+ auto const& param = GetParam();
+
+ for (int i = 0; true; i++) {
+ // Bind the v6 any on a v6-only socket.
+ TestAddress const& test_addr_dual = V6Any();
+ sockaddr_storage addr_dual = test_addr_dual.addr;
+ const FileDescriptor fd_dual = ASSERT_NO_ERRNO_AND_VALUE(
+ Socket(test_addr_dual.family(), param.type, 0));
+ int one = 1;
+ EXPECT_THAT(
+ setsockopt(fd_dual.get(), IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)),
+ SyscallSucceeds());
+ ASSERT_THAT(bind(fd_dual.get(), reinterpret_cast<sockaddr*>(&addr_dual),
+ test_addr_dual.addr_len),
+ SyscallSucceeds());
+
+ // Get the port that we bound.
+ socklen_t addrlen = test_addr_dual.addr_len;
+ ASSERT_THAT(getsockname(fd_dual.get(),
+ reinterpret_cast<sockaddr*>(&addr_dual), &addrlen),
+ SyscallSucceeds());
+ uint16_t const port =
+ ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr_dual.family(), addr_dual));
+
+ // Verify that binding the v6 loopback with the same port fails.
+ TestAddress const& test_addr_v6 = V6Loopback();
+ sockaddr_storage addr_v6 = test_addr_v6.addr;
+ ASSERT_NO_ERRNO(SetAddrPort(test_addr_v6.family(), &addr_v6, port));
+ const FileDescriptor fd_v6 =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v6.family(), param.type, 0));
+ ASSERT_THAT(bind(fd_v6.get(), reinterpret_cast<sockaddr*>(&addr_v6),
+ test_addr_v6.addr_len),
+ SyscallFailsWithErrno(EADDRINUSE));
+
+ // Verify that we can still bind the v4 loopback on the same port.
+ TestAddress const& test_addr_v4_mapped = V4MappedLoopback();
+ sockaddr_storage addr_v4_mapped = test_addr_v4_mapped.addr;
+ ASSERT_NO_ERRNO(
+ SetAddrPort(test_addr_v4_mapped.family(), &addr_v4_mapped, port));
+ const FileDescriptor fd_v4_mapped = ASSERT_NO_ERRNO_AND_VALUE(
+ Socket(test_addr_v4_mapped.family(), param.type, 0));
+ int ret =
+ bind(fd_v4_mapped.get(), reinterpret_cast<sockaddr*>(&addr_v4_mapped),
+ test_addr_v4_mapped.addr_len);
+ if (ret == -1 && errno == EADDRINUSE) {
+ // Port may have been in use.
+ ASSERT_LT(i, 100); // Give up after 100 tries.
+ continue;
+ }
+ ASSERT_THAT(ret, SyscallSucceeds());
+
+ // No need to try again.
+ break;
+ }
+}
+
+TEST_P(SocketMultiProtocolInetLoopbackTest, V6EphemeralPortReserved) {
+ auto const& param = GetParam();
+
+ // FIXME
+ SKIP_IF(IsRunningOnGvisor() && param.type == SOCK_STREAM);
+
+ for (int i = 0; true; i++) {
+ // Bind the v6 loopback on a dual stack socket.
+ TestAddress const& test_addr = V6Loopback();
+ sockaddr_storage bound_addr = test_addr.addr;
+ const FileDescriptor bound_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0));
+ ASSERT_THAT(bind(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr),
+ test_addr.addr_len),
+ SyscallSucceeds());
+
+ // Listen iff TCP.
+ if (param.type == SOCK_STREAM) {
+ ASSERT_THAT(listen(bound_fd.get(), SOMAXCONN), SyscallSucceeds());
+ }
+
+ // Get the port that we bound.
+ socklen_t bound_addr_len = test_addr.addr_len;
+ ASSERT_THAT(
+ getsockname(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr),
+ &bound_addr_len),
+ SyscallSucceeds());
+
+ // Connect to bind an ephemeral port.
+ const FileDescriptor connected_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0));
+ ASSERT_THAT(
+ connect(connected_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr),
+ bound_addr_len),
+ SyscallSucceeds());
+
+ // Get the ephemeral port.
+ sockaddr_storage connected_addr = {};
+ socklen_t connected_addr_len = sizeof(connected_addr);
+ ASSERT_THAT(getsockname(connected_fd.get(),
+ reinterpret_cast<sockaddr*>(&connected_addr),
+ &connected_addr_len),
+ SyscallSucceeds());
+ uint16_t const ephemeral_port =
+ ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr.family(), connected_addr));
+
+ // Verify that we actually got an ephemeral port.
+ ASSERT_NE(ephemeral_port, 0);
+
+ // Verify that the ephemeral port is reserved.
+ const FileDescriptor checking_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0));
+ EXPECT_THAT(
+ bind(checking_fd.get(), reinterpret_cast<sockaddr*>(&connected_addr),
+ connected_addr_len),
+ SyscallFailsWithErrno(EADDRINUSE));
+
+ // Verify that binding the v6 loopback with the same port fails.
+ TestAddress const& test_addr_v6 = V6Loopback();
+ sockaddr_storage addr_v6 = test_addr_v6.addr;
+ ASSERT_NO_ERRNO(
+ SetAddrPort(test_addr_v6.family(), &addr_v6, ephemeral_port));
+ const FileDescriptor fd_v6 =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v6.family(), param.type, 0));
+ ASSERT_THAT(bind(fd_v6.get(), reinterpret_cast<sockaddr*>(&addr_v6),
+ test_addr_v6.addr_len),
+ SyscallFailsWithErrno(EADDRINUSE));
+
+ // Verify that binding the v4 any with the same port fails.
+ TestAddress const& test_addr_v4_any = V4Any();
+ sockaddr_storage addr_v4_any = test_addr_v4_any.addr;
+ ASSERT_NO_ERRNO(
+ SetAddrPort(test_addr_v4_any.family(), &addr_v4_any, ephemeral_port));
+ const FileDescriptor fd_v4_any = ASSERT_NO_ERRNO_AND_VALUE(
+ Socket(test_addr_v4_any.family(), param.type, 0));
+ ASSERT_THAT(bind(fd_v4_any.get(), reinterpret_cast<sockaddr*>(&addr_v4_any),
+ test_addr_v4_any.addr_len),
+ SyscallFailsWithErrno(EADDRINUSE));
+
+ // Verify that we can still bind the v4 loopback on the same port.
+ TestAddress const& test_addr_v4_mapped = V4MappedLoopback();
+ sockaddr_storage addr_v4_mapped = test_addr_v4_mapped.addr;
+ ASSERT_NO_ERRNO(SetAddrPort(test_addr_v4_mapped.family(), &addr_v4_mapped,
+ ephemeral_port));
+ const FileDescriptor fd_v4_mapped = ASSERT_NO_ERRNO_AND_VALUE(
+ Socket(test_addr_v4_mapped.family(), param.type, 0));
+ int ret =
+ bind(fd_v4_mapped.get(), reinterpret_cast<sockaddr*>(&addr_v4_mapped),
+ test_addr_v4_mapped.addr_len);
+ if (ret == -1 && errno == EADDRINUSE) {
+ // Port may have been in use.
+ ASSERT_LT(i, 100); // Give up after 100 tries.
+ continue;
+ }
+ EXPECT_THAT(ret, SyscallSucceeds());
+
+ // No need to try again.
+ break;
+ }
+}
+
+TEST_P(SocketMultiProtocolInetLoopbackTest, V4MappedEphemeralPortReserved) {
+ auto const& param = GetParam();
+
+ // FIXME
+ SKIP_IF(IsRunningOnGvisor() && param.type == SOCK_STREAM);
+
+ for (int i = 0; true; i++) {
+ // Bind the v4 loopback on a dual stack socket.
+ TestAddress const& test_addr = V4MappedLoopback();
+ sockaddr_storage bound_addr = test_addr.addr;
+ const FileDescriptor bound_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0));
+ ASSERT_THAT(bind(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr),
+ test_addr.addr_len),
+ SyscallSucceeds());
+
+ // Listen iff TCP.
+ if (param.type == SOCK_STREAM) {
+ ASSERT_THAT(listen(bound_fd.get(), SOMAXCONN), SyscallSucceeds());
+ }
+
+ // Get the port that we bound.
+ socklen_t bound_addr_len = test_addr.addr_len;
+ ASSERT_THAT(
+ getsockname(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr),
+ &bound_addr_len),
+ SyscallSucceeds());
+
+ // Connect to bind an ephemeral port.
+ const FileDescriptor connected_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0));
+ ASSERT_THAT(
+ connect(connected_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr),
+ bound_addr_len),
+ SyscallSucceeds());
+
+ // Get the ephemeral port.
+ sockaddr_storage connected_addr = {};
+ socklen_t connected_addr_len = sizeof(connected_addr);
+ ASSERT_THAT(getsockname(connected_fd.get(),
+ reinterpret_cast<sockaddr*>(&connected_addr),
+ &connected_addr_len),
+ SyscallSucceeds());
+ uint16_t const ephemeral_port =
+ ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr.family(), connected_addr));
+
+ // Verify that we actually got an ephemeral port.
+ ASSERT_NE(ephemeral_port, 0);
+
+ // Verify that the ephemeral port is reserved.
+ const FileDescriptor checking_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0));
+ EXPECT_THAT(
+ bind(checking_fd.get(), reinterpret_cast<sockaddr*>(&connected_addr),
+ connected_addr_len),
+ SyscallFailsWithErrno(EADDRINUSE));
+
+ // Verify that binding the v4 loopback on the same port with a v4 socket
+ // fails.
+ TestAddress const& test_addr_v4 = V4Loopback();
+ sockaddr_storage addr_v4 = test_addr_v4.addr;
+ ASSERT_NO_ERRNO(
+ SetAddrPort(test_addr_v4.family(), &addr_v4, ephemeral_port));
+ const FileDescriptor fd_v4 =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v4.family(), param.type, 0));
+ EXPECT_THAT(bind(fd_v4.get(), reinterpret_cast<sockaddr*>(&addr_v4),
+ test_addr_v4.addr_len),
+ SyscallFailsWithErrno(EADDRINUSE));
+
+ // Verify that binding the v6 any on the same port with a dual-stack socket
+ // fails.
+ TestAddress const& test_addr_v6_any = V6Any();
+ sockaddr_storage addr_v6_any = test_addr_v6_any.addr;
+ ASSERT_NO_ERRNO(
+ SetAddrPort(test_addr_v6_any.family(), &addr_v6_any, ephemeral_port));
+ const FileDescriptor fd_v6_any = ASSERT_NO_ERRNO_AND_VALUE(
+ Socket(test_addr_v6_any.family(), param.type, 0));
+ ASSERT_THAT(bind(fd_v6_any.get(), reinterpret_cast<sockaddr*>(&addr_v6_any),
+ test_addr_v6_any.addr_len),
+ SyscallFailsWithErrno(EADDRINUSE));
+
+ // For some reason, binding the TCP v6-only any is flaky on Linux. Maybe we
+ // tend to run out of ephemeral ports? Regardless, binding the v6 loopback
+ // seems pretty reliable. Only try to bind the v6-only any on UDP and
+ // gVisor.
+
+ int ret = -1;
+
+ if (!IsRunningOnGvisor() && param.type == SOCK_STREAM) {
+ // Verify that we can still bind the v6 loopback on the same port.
+ TestAddress const& test_addr_v6 = V6Loopback();
+ sockaddr_storage addr_v6 = test_addr_v6.addr;
+ ASSERT_NO_ERRNO(
+ SetAddrPort(test_addr_v6.family(), &addr_v6, ephemeral_port));
+ const FileDescriptor fd_v6 = ASSERT_NO_ERRNO_AND_VALUE(
+ Socket(test_addr_v6.family(), param.type, 0));
+ ret = bind(fd_v6.get(), reinterpret_cast<sockaddr*>(&addr_v6),
+ test_addr_v6.addr_len);
+ } else {
+ // Verify that we can still bind the v6 any on the same port with a
+ // v6-only socket.
+ const FileDescriptor fd_v6_only_any = ASSERT_NO_ERRNO_AND_VALUE(
+ Socket(test_addr_v6_any.family(), param.type, 0));
+ int one = 1;
+ EXPECT_THAT(setsockopt(fd_v6_only_any.get(), IPPROTO_IPV6, IPV6_V6ONLY,
+ &one, sizeof(one)),
+ SyscallSucceeds());
+ ret =
+ bind(fd_v6_only_any.get(), reinterpret_cast<sockaddr*>(&addr_v6_any),
+ test_addr_v6_any.addr_len);
+ }
+
+ if (ret == -1 && errno == EADDRINUSE) {
+ // Port may have been in use.
+ ASSERT_LT(i, 100); // Give up after 100 tries.
+ continue;
+ }
+ EXPECT_THAT(ret, SyscallSucceeds());
+
+ // No need to try again.
+ break;
+ }
+}
+
+TEST_P(SocketMultiProtocolInetLoopbackTest, V4EphemeralPortReserved) {
+ auto const& param = GetParam();
+
+ // FIXME
+ SKIP_IF(IsRunningOnGvisor() && param.type == SOCK_STREAM);
+
+ for (int i = 0; true; i++) {
+ // Bind the v4 loopback on a v4 socket.
+ TestAddress const& test_addr = V4Loopback();
+ sockaddr_storage bound_addr = test_addr.addr;
+ const FileDescriptor bound_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0));
+ ASSERT_THAT(bind(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr),
+ test_addr.addr_len),
+ SyscallSucceeds());
+
+ // Listen iff TCP.
+ if (param.type == SOCK_STREAM) {
+ ASSERT_THAT(listen(bound_fd.get(), SOMAXCONN), SyscallSucceeds());
+ }
+
+ // Get the port that we bound.
+ socklen_t bound_addr_len = test_addr.addr_len;
+ ASSERT_THAT(
+ getsockname(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr),
+ &bound_addr_len),
+ SyscallSucceeds());
+
+ // Connect to bind an ephemeral port.
+ const FileDescriptor connected_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0));
+ ASSERT_THAT(
+ connect(connected_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr),
+ bound_addr_len),
+ SyscallSucceeds());
+
+ // Get the ephemeral port.
+ sockaddr_storage connected_addr = {};
+ socklen_t connected_addr_len = sizeof(connected_addr);
+ ASSERT_THAT(getsockname(connected_fd.get(),
+ reinterpret_cast<sockaddr*>(&connected_addr),
+ &connected_addr_len),
+ SyscallSucceeds());
+ uint16_t const ephemeral_port =
+ ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr.family(), connected_addr));
+
+ // Verify that we actually got an ephemeral port.
+ ASSERT_NE(ephemeral_port, 0);
+
+ // Verify that the ephemeral port is reserved.
+ const FileDescriptor checking_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0));
+ EXPECT_THAT(
+ bind(checking_fd.get(), reinterpret_cast<sockaddr*>(&connected_addr),
+ connected_addr_len),
+ SyscallFailsWithErrno(EADDRINUSE));
+
+ // Verify that binding the v4 loopback on the same port with a v6 socket
+ // fails.
+ TestAddress const& test_addr_v4_mapped = V4MappedLoopback();
+ sockaddr_storage addr_v4_mapped = test_addr_v4_mapped.addr;
+ ASSERT_NO_ERRNO(SetAddrPort(test_addr_v4_mapped.family(), &addr_v4_mapped,
+ ephemeral_port));
+ const FileDescriptor fd_v4_mapped = ASSERT_NO_ERRNO_AND_VALUE(
+ Socket(test_addr_v4_mapped.family(), param.type, 0));
+ EXPECT_THAT(
+ bind(fd_v4_mapped.get(), reinterpret_cast<sockaddr*>(&addr_v4_mapped),
+ test_addr_v4_mapped.addr_len),
+ SyscallFailsWithErrno(EADDRINUSE));
+
+ // Verify that binding the v6 any on the same port with a dual-stack socket
+ // fails.
+ TestAddress const& test_addr_v6_any = V6Any();
+ sockaddr_storage addr_v6_any = test_addr_v6_any.addr;
+ ASSERT_NO_ERRNO(
+ SetAddrPort(test_addr_v6_any.family(), &addr_v6_any, ephemeral_port));
+ const FileDescriptor fd_v6_any = ASSERT_NO_ERRNO_AND_VALUE(
+ Socket(test_addr_v6_any.family(), param.type, 0));
+ ASSERT_THAT(bind(fd_v6_any.get(), reinterpret_cast<sockaddr*>(&addr_v6_any),
+ test_addr_v6_any.addr_len),
+ SyscallFailsWithErrno(EADDRINUSE));
+
+ // For some reason, binding the TCP v6-only any is flaky on Linux. Maybe we
+ // tend to run out of ephemeral ports? Regardless, binding the v6 loopback
+ // seems pretty reliable. Only try to bind the v6-only any on UDP and
+ // gVisor.
+
+ int ret = -1;
+
+ if (!IsRunningOnGvisor() && param.type == SOCK_STREAM) {
+ // Verify that we can still bind the v6 loopback on the same port.
+ TestAddress const& test_addr_v6 = V6Loopback();
+ sockaddr_storage addr_v6 = test_addr_v6.addr;
+ ASSERT_NO_ERRNO(
+ SetAddrPort(test_addr_v6.family(), &addr_v6, ephemeral_port));
+ const FileDescriptor fd_v6 = ASSERT_NO_ERRNO_AND_VALUE(
+ Socket(test_addr_v6.family(), param.type, 0));
+ ret = bind(fd_v6.get(), reinterpret_cast<sockaddr*>(&addr_v6),
+ test_addr_v6.addr_len);
+ } else {
+ // Verify that we can still bind the v6 any on the same port with a
+ // v6-only socket.
+ const FileDescriptor fd_v6_only_any = ASSERT_NO_ERRNO_AND_VALUE(
+ Socket(test_addr_v6_any.family(), param.type, 0));
+ int one = 1;
+ EXPECT_THAT(setsockopt(fd_v6_only_any.get(), IPPROTO_IPV6, IPV6_V6ONLY,
+ &one, sizeof(one)),
+ SyscallSucceeds());
+ ret =
+ bind(fd_v6_only_any.get(), reinterpret_cast<sockaddr*>(&addr_v6_any),
+ test_addr_v6_any.addr_len);
+ }
+
+ if (ret == -1 && errno == EADDRINUSE) {
+ // Port may have been in use.
+ ASSERT_LT(i, 100); // Give up after 100 tries.
+ continue;
+ }
+ EXPECT_THAT(ret, SyscallSucceeds());
+
+ // No need to try again.
+ break;
+ }
+}
+
+INSTANTIATE_TEST_CASE_P(AllFamlies, SocketMultiProtocolInetLoopbackTest,
+ ::testing::Values(ProtocolTestParam{"TCP", SOCK_STREAM},
+ ProtocolTestParam{"UDP", SOCK_DGRAM}),
+ DescribeProtocolTestParam);
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_ip_tcp_generic.cc b/test/syscalls/linux/socket_ip_tcp_generic.cc
new file mode 100644
index 000000000..bb5a83c9a
--- /dev/null
+++ b/test/syscalls/linux/socket_ip_tcp_generic.cc
@@ -0,0 +1,392 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_ip_tcp_generic.h"
+
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <sys/poll.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+TEST_P(TCPSocketPairTest, TcpInfoSucceedes) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ struct tcp_info opt = {};
+ socklen_t optLen = sizeof(opt);
+ EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_TCP, TCP_INFO, &opt, &optLen),
+ SyscallSucceeds());
+}
+
+TEST_P(TCPSocketPairTest, ShortTcpInfoSucceedes) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ struct tcp_info opt = {};
+ socklen_t optLen = 1;
+ EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_TCP, TCP_INFO, &opt, &optLen),
+ SyscallSucceeds());
+}
+
+TEST_P(TCPSocketPairTest, ZeroTcpInfoSucceedes) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ struct tcp_info opt = {};
+ socklen_t optLen = 0;
+ EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_TCP, TCP_INFO, &opt, &optLen),
+ SyscallSucceeds());
+}
+
+// This test validates that an RST is sent instead of a FIN when data is
+// unread on calls to close(2).
+TEST_P(TCPSocketPairTest, RSTSentOnCloseWithUnreadData) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char buf[10] = {};
+ ASSERT_THAT(RetryEINTR(write)(sockets->first_fd(), buf, sizeof(buf)),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ // Wait until t_ sees the data on its side but don't read it.
+ struct pollfd poll_fd = {sockets->second_fd(), POLLIN | POLLHUP, 0};
+ constexpr int kPollTimeoutMs = 20000; // Wait up to 20 seconds for the data.
+ ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs),
+ SyscallSucceedsWithValue(1));
+
+ // Now close the connected without reading the data.
+ ASSERT_THAT(close(sockets->release_second_fd()), SyscallSucceeds());
+
+ // Wait for the other end to receive the RST (up to 20 seconds).
+ struct pollfd poll_fd2 = {sockets->first_fd(), POLLIN | POLLHUP, 0};
+ ASSERT_THAT(RetryEINTR(poll)(&poll_fd2, 1, kPollTimeoutMs),
+ SyscallSucceedsWithValue(1));
+
+ // A shutdown with unread data will cause a RST to be sent instead
+ // of a FIN, per RFC 2525 section 2.17; this is also what Linux does.
+ ASSERT_THAT(RetryEINTR(read)(sockets->first_fd(), buf, sizeof(buf)),
+ SyscallFailsWithErrno(ECONNRESET));
+}
+
+// This test will validate that a RST will cause POLLHUP to trigger.
+TEST_P(TCPSocketPairTest, RSTCausesPollHUP) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char buf[10] = {};
+ ASSERT_THAT(RetryEINTR(write)(sockets->first_fd(), buf, sizeof(buf)),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ // Wait until second sees the data on its side but don't read it.
+ struct pollfd poll_fd = {sockets->second_fd(), POLLIN, 0};
+ constexpr int kPollTimeoutMs = 20000; // Wait up to 20 seconds for the data.
+ ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs),
+ SyscallSucceedsWithValue(1));
+ EXPECT_EQ(poll_fd.revents & POLLIN, POLLIN);
+
+ // Confirm we at least have one unread byte.
+ int bytes_available = 0;
+ ASSERT_THAT(
+ RetryEINTR(ioctl)(sockets->second_fd(), FIONREAD, &bytes_available),
+ SyscallSucceeds());
+ EXPECT_GT(bytes_available, 0);
+
+ // Now close the connected socket without reading the data from the second,
+ // this will cause a RST and we should see that with POLLHUP.
+ ASSERT_THAT(close(sockets->release_second_fd()), SyscallSucceeds());
+
+ // Wait for the other end to receive the RST (up to 20 seconds).
+ struct pollfd poll_fd3 = {sockets->first_fd(), POLLHUP, 0};
+ ASSERT_THAT(RetryEINTR(poll)(&poll_fd3, 1, kPollTimeoutMs),
+ SyscallSucceedsWithValue(1));
+ ASSERT_NE(poll_fd.revents & (POLLHUP | POLLIN), 0);
+}
+
+// This test validates that even if a RST is sent the other end will not
+// get an ECONNRESET until it's read all data.
+TEST_P(TCPSocketPairTest, RSTSentOnCloseWithUnreadDataAllowsReadBuffered) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char buf[10] = {};
+ ASSERT_THAT(RetryEINTR(write)(sockets->first_fd(), buf, sizeof(buf)),
+ SyscallSucceedsWithValue(sizeof(buf)));
+ ASSERT_THAT(RetryEINTR(write)(sockets->second_fd(), buf, sizeof(buf)),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ // Wait until second sees the data on its side but don't read it.
+ struct pollfd poll_fd = {sockets->second_fd(), POLLIN, 0};
+ constexpr int kPollTimeoutMs = 30000; // Wait up to 30 seconds for the data.
+ ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs),
+ SyscallSucceedsWithValue(1));
+
+ // Wait until first sees the data on its side but don't read it.
+ struct pollfd poll_fd2 = {sockets->first_fd(), POLLIN, 0};
+ ASSERT_THAT(RetryEINTR(poll)(&poll_fd2, 1, kPollTimeoutMs),
+ SyscallSucceedsWithValue(1));
+
+ // Now close the connected socket without reading the data from the second.
+ ASSERT_THAT(close(sockets->release_second_fd()), SyscallSucceeds());
+
+ // Wait for the other end to receive the RST (up to 30 seconds).
+ struct pollfd poll_fd3 = {sockets->first_fd(), POLLHUP, 0};
+ ASSERT_THAT(RetryEINTR(poll)(&poll_fd3, 1, kPollTimeoutMs),
+ SyscallSucceedsWithValue(1));
+
+ // Since we also have data buffered we should be able to read it before
+ // the syscall will fail with ECONNRESET.
+ ASSERT_THAT(RetryEINTR(read)(sockets->first_fd(), buf, sizeof(buf)),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ // A shutdown with unread data will cause a RST to be sent instead
+ // of a FIN, per RFC 2525 section 2.17; this is also what Linux does.
+ ASSERT_THAT(RetryEINTR(read)(sockets->first_fd(), buf, sizeof(buf)),
+ SyscallFailsWithErrno(ECONNRESET));
+}
+
+// This test will verify that a clean shutdown (FIN) is preformed when there
+// is unread data but only the write side is closed.
+TEST_P(TCPSocketPairTest, FINSentOnShutdownWrWithUnreadData) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char buf[10] = {};
+ ASSERT_THAT(RetryEINTR(write)(sockets->first_fd(), buf, sizeof(buf)),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ // Wait until t_ sees the data on its side but don't read it.
+ struct pollfd poll_fd = {sockets->second_fd(), POLLIN | POLLHUP, 0};
+ constexpr int kPollTimeoutMs = 20000; // Wait up to 20 seconds for the data.
+ ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs),
+ SyscallSucceedsWithValue(1));
+
+ // Now shutdown the write end leaving the read end open.
+ ASSERT_THAT(shutdown(sockets->second_fd(), SHUT_WR), SyscallSucceeds());
+
+ // Wait for the other end to receive the FIN (up to 20 seconds).
+ struct pollfd poll_fd2 = {sockets->first_fd(), POLLIN | POLLHUP, 0};
+ ASSERT_THAT(RetryEINTR(poll)(&poll_fd2, 1, kPollTimeoutMs),
+ SyscallSucceedsWithValue(1));
+
+ // Since we didn't shutdown the read end this will be a clean close.
+ ASSERT_THAT(RetryEINTR(read)(sockets->first_fd(), buf, sizeof(buf)),
+ SyscallSucceedsWithValue(0));
+}
+
+// This test will verify that when data is received by a socket, even if it's
+// not read SHUT_RD will not cause any packets to be generated and data will
+// remain in the buffer and can be read later.
+TEST_P(TCPSocketPairTest, ShutdownRdShouldCauseNoPacketsWithUnreadData) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char buf[10] = {};
+ ASSERT_THAT(RetryEINTR(write)(sockets->first_fd(), buf, sizeof(buf)),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ // Wait until t_ sees the data on its side but don't read it.
+ struct pollfd poll_fd = {sockets->second_fd(), POLLIN | POLLHUP, 0};
+ constexpr int kPollTimeoutMs = 20000; // Wait up to 20 seconds for the data.
+ ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs),
+ SyscallSucceedsWithValue(1));
+
+ // Now shutdown the read end, this will generate no packets to the other end.
+ ASSERT_THAT(shutdown(sockets->second_fd(), SHUT_RD), SyscallSucceeds());
+
+ // We should not receive any events on the other side of the socket.
+ struct pollfd poll_fd2 = {sockets->first_fd(), POLLIN | POLLHUP, 0};
+ constexpr int kPollNoResponseTimeoutMs = 3000;
+ ASSERT_THAT(RetryEINTR(poll)(&poll_fd2, 1, kPollNoResponseTimeoutMs),
+ SyscallSucceedsWithValue(0)); // Timeout.
+
+ // Even though we did a SHUT_RD on the read end we can still read the data.
+ ASSERT_THAT(RetryEINTR(read)(sockets->second_fd(), buf, sizeof(buf)),
+ SyscallSucceedsWithValue(sizeof(buf)));
+}
+
+TEST_P(TCPSocketPairTest, ClosedReadNonBlockingSocket) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ // Set the read end to O_NONBLOCK.
+ int opts = 0;
+ ASSERT_THAT(opts = fcntl(sockets->second_fd(), F_GETFL), SyscallSucceeds());
+ ASSERT_THAT(fcntl(sockets->second_fd(), F_SETFL, opts | O_NONBLOCK),
+ SyscallSucceeds());
+
+ char buf[10] = {};
+ ASSERT_THAT(RetryEINTR(send)(sockets->first_fd(), buf, sizeof(buf), 0),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ // Wait until second_fd sees the data and then recv it.
+ struct pollfd poll_fd = {sockets->second_fd(), POLLIN, 0};
+ constexpr int kPollTimeoutMs = 2000; // Wait up to 2 seconds for the data.
+ ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs),
+ SyscallSucceedsWithValue(1));
+
+ ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), buf, sizeof(buf), 0),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ // Now shutdown the write end leaving the read end open.
+ ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds());
+
+ // Wait for close notification and recv again.
+ struct pollfd poll_fd2 = {sockets->second_fd(), POLLIN, 0};
+ ASSERT_THAT(RetryEINTR(poll)(&poll_fd2, 1, kPollTimeoutMs),
+ SyscallSucceedsWithValue(1));
+
+ ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), buf, sizeof(buf), 0),
+ SyscallSucceedsWithValue(0));
+}
+
+TEST_P(TCPSocketPairTest,
+ ShutdownRdUnreadDataShouldCauseNoPacketsUnlessClosed) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char buf[10] = {};
+ ASSERT_THAT(RetryEINTR(write)(sockets->first_fd(), buf, sizeof(buf)),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ // Wait until t_ sees the data on its side but don't read it.
+ struct pollfd poll_fd = {sockets->second_fd(), POLLIN | POLLHUP, 0};
+ constexpr int kPollTimeoutMs = 20000; // Wait up to 20 seconds for the data.
+ ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs),
+ SyscallSucceedsWithValue(1));
+
+ // Now shutdown the read end, this will generate no packets to the other end.
+ ASSERT_THAT(shutdown(sockets->second_fd(), SHUT_RD), SyscallSucceeds());
+
+ // We should not receive any events on the other side of the socket.
+ struct pollfd poll_fd2 = {sockets->first_fd(), POLLIN | POLLHUP, 0};
+ constexpr int kPollNoResponseTimeoutMs = 3000;
+ ASSERT_THAT(RetryEINTR(poll)(&poll_fd2, 1, kPollNoResponseTimeoutMs),
+ SyscallSucceedsWithValue(0)); // Timeout.
+
+ // Now since we've fully closed the connection it will generate a RST.
+ ASSERT_THAT(close(sockets->release_second_fd()), SyscallSucceeds());
+ ASSERT_THAT(RetryEINTR(poll)(&poll_fd2, 1, kPollTimeoutMs),
+ SyscallSucceedsWithValue(1)); // The other end has closed.
+
+ // A shutdown with unread data will cause a RST to be sent instead
+ // of a FIN, per RFC 2525 section 2.17; this is also what Linux does.
+ ASSERT_THAT(RetryEINTR(read)(sockets->first_fd(), buf, sizeof(buf)),
+ SyscallFailsWithErrno(ECONNRESET));
+}
+
+TEST_P(TCPSocketPairTest, TCPCorkDefault) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ int get = -1;
+ socklen_t get_len = sizeof(get);
+ EXPECT_THAT(
+ getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CORK, &get, &get_len),
+ SyscallSucceedsWithValue(0));
+ EXPECT_EQ(get_len, sizeof(get));
+ EXPECT_EQ(get, 0);
+}
+
+TEST_P(TCPSocketPairTest, SetTCPCork) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CORK,
+ &kSockOptOn, sizeof(kSockOptOn)),
+ SyscallSucceeds());
+
+ int get = -1;
+ socklen_t get_len = sizeof(get);
+ EXPECT_THAT(
+ getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CORK, &get, &get_len),
+ SyscallSucceedsWithValue(0));
+ EXPECT_EQ(get_len, sizeof(get));
+ EXPECT_EQ(get, kSockOptOn);
+
+ ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CORK,
+ &kSockOptOff, sizeof(kSockOptOff)),
+ SyscallSucceeds());
+
+ EXPECT_THAT(
+ getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CORK, &get, &get_len),
+ SyscallSucceedsWithValue(0));
+ EXPECT_EQ(get_len, sizeof(get));
+ EXPECT_EQ(get, kSockOptOff);
+}
+
+TEST_P(TCPSocketPairTest, TCPCork) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CORK,
+ &kSockOptOn, sizeof(kSockOptOn)),
+ SyscallSucceeds());
+
+ constexpr char kData[] = "abc";
+ ASSERT_THAT(WriteFd(sockets->first_fd(), kData, sizeof(kData)),
+ SyscallSucceedsWithValue(sizeof(kData)));
+
+ ASSERT_NO_FATAL_FAILURE(RecvNoData(sockets->second_fd()));
+
+ EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CORK,
+ &kSockOptOff, sizeof(kSockOptOff)),
+ SyscallSucceeds());
+
+ // Create a receive buffer larger than kData.
+ char buf[(sizeof(kData) + 1) * 2] = {};
+ ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), buf, sizeof(buf), 0),
+ SyscallSucceedsWithValue(sizeof(kData)));
+ EXPECT_EQ(absl::string_view(kData, sizeof(kData)),
+ absl::string_view(buf, sizeof(kData)));
+}
+
+TEST_P(TCPSocketPairTest, TCPQuickAckDefault) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ int get = -1;
+ socklen_t get_len = sizeof(get);
+ EXPECT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_QUICKACK, &get,
+ &get_len),
+ SyscallSucceedsWithValue(0));
+ EXPECT_EQ(get_len, sizeof(get));
+ EXPECT_EQ(get, kSockOptOn);
+}
+
+TEST_P(TCPSocketPairTest, SetTCPQuickAck) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_QUICKACK,
+ &kSockOptOff, sizeof(kSockOptOff)),
+ SyscallSucceeds());
+
+ int get = -1;
+ socklen_t get_len = sizeof(get);
+ EXPECT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_QUICKACK, &get,
+ &get_len),
+ SyscallSucceedsWithValue(0));
+ EXPECT_EQ(get_len, sizeof(get));
+ EXPECT_EQ(get, kSockOptOff);
+
+ ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_QUICKACK,
+ &kSockOptOn, sizeof(kSockOptOn)),
+ SyscallSucceeds());
+
+ EXPECT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_QUICKACK, &get,
+ &get_len),
+ SyscallSucceedsWithValue(0));
+ EXPECT_EQ(get_len, sizeof(get));
+ EXPECT_EQ(get, kSockOptOn);
+}
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_ip_tcp_generic.h b/test/syscalls/linux/socket_ip_tcp_generic.h
new file mode 100644
index 000000000..f38500d14
--- /dev/null
+++ b/test/syscalls/linux/socket_ip_tcp_generic.h
@@ -0,0 +1,29 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IP_TCP_GENERIC_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IP_TCP_GENERIC_H_
+
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Test fixture for tests that apply to pairs of connected TCP sockets.
+using TCPSocketPairTest = SocketPairTest;
+
+} // namespace testing
+} // namespace gvisor
+
+#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IP_TCP_GENERIC_H_
diff --git a/test/syscalls/linux/socket_ip_tcp_generic_loopback.cc b/test/syscalls/linux/socket_ip_tcp_generic_loopback.cc
new file mode 100644
index 000000000..9e10dea30
--- /dev/null
+++ b/test/syscalls/linux/socket_ip_tcp_generic_loopback.cc
@@ -0,0 +1,47 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <netinet/tcp.h>
+#include <vector>
+
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_ip_tcp_generic.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+ return ApplyVecToVec<SocketPairKind>(
+ std::vector<Middleware>{
+ NoOp, SetSockOpt(IPPROTO_TCP, TCP_NODELAY, &kSockOptOn)},
+ VecCat<SocketPairKind>(
+ ApplyVec<SocketPairKind>(
+ IPv6TCPAcceptBindSocketPair,
+ AllBitwiseCombinations(List<int>{0, SOCK_CLOEXEC})),
+ ApplyVec<SocketPairKind>(
+ IPv4TCPAcceptBindSocketPair,
+ AllBitwiseCombinations(List<int>{0, SOCK_CLOEXEC})),
+ ApplyVec<SocketPairKind>(
+ DualStackTCPAcceptBindSocketPair,
+ AllBitwiseCombinations(List<int>{0, SOCK_CLOEXEC}))));
+}
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, TCPSocketPairTest,
+ ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_ip_tcp_loopback.cc b/test/syscalls/linux/socket_ip_tcp_loopback.cc
new file mode 100644
index 000000000..f95061506
--- /dev/null
+++ b/test/syscalls/linux/socket_ip_tcp_loopback.cc
@@ -0,0 +1,43 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_generic.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+ return VecCat<SocketPairKind>(
+ ApplyVec<SocketPairKind>(
+ IPv6TCPAcceptBindSocketPair,
+ AllBitwiseCombinations(List<int>{0, SOCK_CLOEXEC})),
+ ApplyVec<SocketPairKind>(
+ IPv4TCPAcceptBindSocketPair,
+ AllBitwiseCombinations(List<int>{0, SOCK_CLOEXEC})),
+ ApplyVec<SocketPairKind>(
+ DualStackTCPAcceptBindSocketPair,
+ AllBitwiseCombinations(List<int>{0, SOCK_CLOEXEC})));
+}
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, AllSocketPairTest,
+ ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_ip_tcp_loopback_blocking.cc b/test/syscalls/linux/socket_ip_tcp_loopback_blocking.cc
new file mode 100644
index 000000000..bb419e3a8
--- /dev/null
+++ b/test/syscalls/linux/socket_ip_tcp_loopback_blocking.cc
@@ -0,0 +1,44 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <netinet/tcp.h>
+#include <vector>
+
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_stream_blocking.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+ return ApplyVecToVec<SocketPairKind>(
+ std::vector<Middleware>{
+ NoOp, SetSockOpt(IPPROTO_TCP, TCP_NODELAY, &kSockOptOn)},
+ VecCat<SocketPairKind>(
+ ApplyVec<SocketPairKind>(
+ IPv6TCPAcceptBindSocketPair,
+ AllBitwiseCombinations(List<int>{0, SOCK_CLOEXEC})),
+ ApplyVec<SocketPairKind>(
+ IPv4TCPAcceptBindSocketPair,
+ AllBitwiseCombinations(List<int>{0, SOCK_CLOEXEC}))));
+}
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, BlockingStreamSocketPairTest,
+ ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_ip_tcp_loopback_nonblock.cc b/test/syscalls/linux/socket_ip_tcp_loopback_nonblock.cc
new file mode 100644
index 000000000..af6fd635e
--- /dev/null
+++ b/test/syscalls/linux/socket_ip_tcp_loopback_nonblock.cc
@@ -0,0 +1,46 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <netinet/tcp.h>
+#include <vector>
+
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_non_blocking.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+ return ApplyVecToVec<SocketPairKind>(
+ std::vector<Middleware>{
+ NoOp, SetSockOpt(IPPROTO_TCP, TCP_NODELAY, &kSockOptOn)},
+ VecCat<SocketPairKind>(
+ ApplyVec<SocketPairKind>(
+ IPv6TCPAcceptBindSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC})),
+ ApplyVec<SocketPairKind>(
+ IPv4TCPAcceptBindSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC}))));
+}
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, NonBlockingSocketPairTest,
+ ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_ip_tcp_udp_generic.cc b/test/syscalls/linux/socket_ip_tcp_udp_generic.cc
new file mode 100644
index 000000000..91d029985
--- /dev/null
+++ b/test/syscalls/linux/socket_ip_tcp_udp_generic.cc
@@ -0,0 +1,78 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <sys/poll.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Test fixture for tests that apply to pairs of TCP and UDP sockets.
+using TcpUdpSocketPairTest = SocketPairTest;
+
+TEST_P(TcpUdpSocketPairTest, ShutdownWrFollowedBySendIsError) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ // Now shutdown the write end of the first.
+ ASSERT_THAT(shutdown(sockets->first_fd(), SHUT_WR), SyscallSucceeds());
+
+ char buf[10] = {};
+ ASSERT_THAT(RetryEINTR(send)(sockets->first_fd(), buf, sizeof(buf), 0),
+ SyscallFailsWithErrno(EPIPE));
+}
+
+std::vector<SocketPairKind> GetSocketPairs() {
+ return VecCat<SocketPairKind>(
+ ApplyVec<SocketPairKind>(
+ IPv6UDPBidirectionalBindSocketPair,
+ AllBitwiseCombinations(List<int>{0, SOCK_NONBLOCK})),
+ ApplyVec<SocketPairKind>(
+ IPv4UDPBidirectionalBindSocketPair,
+ AllBitwiseCombinations(List<int>{0, SOCK_NONBLOCK})),
+ ApplyVec<SocketPairKind>(
+ DualStackUDPBidirectionalBindSocketPair,
+ AllBitwiseCombinations(List<int>{0, SOCK_NONBLOCK})),
+ ApplyVec<SocketPairKind>(
+ IPv6TCPAcceptBindSocketPair,
+ AllBitwiseCombinations(List<int>{0, SOCK_NONBLOCK})),
+ ApplyVec<SocketPairKind>(
+ IPv4TCPAcceptBindSocketPair,
+ AllBitwiseCombinations(List<int>{0, SOCK_NONBLOCK})),
+ ApplyVec<SocketPairKind>(
+ DualStackTCPAcceptBindSocketPair,
+ AllBitwiseCombinations(List<int>{0, SOCK_NONBLOCK})));
+}
+
+INSTANTIATE_TEST_CASE_P(
+ AllTCPSockets, TcpUdpSocketPairTest,
+ ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_ip_udp_loopback.cc b/test/syscalls/linux/socket_ip_udp_loopback.cc
new file mode 100644
index 000000000..8a98fa8df
--- /dev/null
+++ b/test/syscalls/linux/socket_ip_udp_loopback.cc
@@ -0,0 +1,48 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_generic.h"
+#include "test/syscalls/linux/socket_non_stream.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+ return VecCat<SocketPairKind>(
+ ApplyVec<SocketPairKind>(
+ IPv6UDPBidirectionalBindSocketPair,
+ AllBitwiseCombinations(List<int>{0, SOCK_CLOEXEC})),
+ ApplyVec<SocketPairKind>(
+ IPv4UDPBidirectionalBindSocketPair,
+ AllBitwiseCombinations(List<int>{0, SOCK_CLOEXEC})),
+ ApplyVec<SocketPairKind>(
+ DualStackUDPBidirectionalBindSocketPair,
+ AllBitwiseCombinations(List<int>{0, SOCK_CLOEXEC})));
+}
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, AllSocketPairTest,
+ ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, NonStreamSocketPairTest,
+ ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_ip_udp_loopback_blocking.cc b/test/syscalls/linux/socket_ip_udp_loopback_blocking.cc
new file mode 100644
index 000000000..08ff3e656
--- /dev/null
+++ b/test/syscalls/linux/socket_ip_udp_loopback_blocking.cc
@@ -0,0 +1,40 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_non_stream_blocking.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+ return VecCat<SocketPairKind>(
+ ApplyVec<SocketPairKind>(
+ IPv6UDPBidirectionalBindSocketPair,
+ AllBitwiseCombinations(List<int>{0, SOCK_CLOEXEC})),
+ ApplyVec<SocketPairKind>(
+ IPv4UDPBidirectionalBindSocketPair,
+ AllBitwiseCombinations(List<int>{0, SOCK_CLOEXEC})));
+}
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, BlockingNonStreamSocketPairTest,
+ ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_ip_udp_loopback_nonblock.cc b/test/syscalls/linux/socket_ip_udp_loopback_nonblock.cc
new file mode 100644
index 000000000..256bcfccf
--- /dev/null
+++ b/test/syscalls/linux/socket_ip_udp_loopback_nonblock.cc
@@ -0,0 +1,42 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_non_blocking.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+ return VecCat<SocketPairKind>(
+ ApplyVec<SocketPairKind>(
+ IPv6UDPBidirectionalBindSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC})),
+ ApplyVec<SocketPairKind>(
+ IPv4UDPBidirectionalBindSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC})));
+}
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, NonBlockingSocketPairTest,
+ ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_netdevice.cc b/test/syscalls/linux/socket_netdevice.cc
new file mode 100644
index 000000000..7bfb62a6f
--- /dev/null
+++ b/test/syscalls/linux/socket_netdevice.cc
@@ -0,0 +1,182 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/sockios.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+
+#include "gtest/gtest.h"
+#include "absl/base/internal/endian.h"
+#include "test/syscalls/linux/socket_netlink_util.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/test_util.h"
+
+// Tests for netdevice queries.
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+using ::testing::AnyOf;
+using ::testing::Eq;
+
+TEST(NetdeviceTest, Loopback) {
+ FileDescriptor sock =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0));
+
+ // Prepare the request.
+ struct ifreq ifr;
+ snprintf(ifr.ifr_name, IFNAMSIZ, "lo");
+
+ // Check for a non-zero interface index.
+ ASSERT_THAT(ioctl(sock.get(), SIOCGIFINDEX, &ifr), SyscallSucceeds());
+ EXPECT_NE(ifr.ifr_ifindex, 0);
+
+ // Check that the loopback is zero hardware address.
+ ASSERT_THAT(ioctl(sock.get(), SIOCGIFHWADDR, &ifr), SyscallSucceeds());
+ EXPECT_EQ(ifr.ifr_hwaddr.sa_data[0], 0);
+ EXPECT_EQ(ifr.ifr_hwaddr.sa_data[1], 0);
+ EXPECT_EQ(ifr.ifr_hwaddr.sa_data[2], 0);
+ EXPECT_EQ(ifr.ifr_hwaddr.sa_data[3], 0);
+ EXPECT_EQ(ifr.ifr_hwaddr.sa_data[4], 0);
+ EXPECT_EQ(ifr.ifr_hwaddr.sa_data[5], 0);
+}
+
+TEST(NetdeviceTest, Netmask) {
+ // We need an interface index to identify the loopback device.
+ FileDescriptor sock =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0));
+ struct ifreq ifr;
+ snprintf(ifr.ifr_name, IFNAMSIZ, "lo");
+ ASSERT_THAT(ioctl(sock.get(), SIOCGIFINDEX, &ifr), SyscallSucceeds());
+ EXPECT_NE(ifr.ifr_ifindex, 0);
+
+ // Use a netlink socket to get the netmask, which we'll then compare to the
+ // netmask obtained via ioctl.
+ FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket());
+ uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get()));
+
+ struct request {
+ struct nlmsghdr hdr;
+ struct rtgenmsg rgm;
+ };
+
+ constexpr uint32_t kSeq = 12345;
+
+ struct request req;
+ req.hdr.nlmsg_len = sizeof(req);
+ req.hdr.nlmsg_type = RTM_GETADDR;
+ req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+ req.hdr.nlmsg_seq = kSeq;
+ req.rgm.rtgen_family = AF_UNSPEC;
+
+ // Iterate through messages until we find the one containing the prefix length
+ // (i.e. netmask) for the loopback device.
+ int prefixlen = -1;
+ ASSERT_NO_ERRNO(NetlinkRequestResponse(
+ fd, &req, sizeof(req), [&](const struct nlmsghdr *hdr) {
+ EXPECT_THAT(hdr->nlmsg_type, AnyOf(Eq(RTM_NEWADDR), Eq(NLMSG_DONE)));
+
+ EXPECT_TRUE((hdr->nlmsg_flags & NLM_F_MULTI) == NLM_F_MULTI)
+ << std::hex << hdr->nlmsg_flags;
+
+ EXPECT_EQ(hdr->nlmsg_seq, kSeq);
+ EXPECT_EQ(hdr->nlmsg_pid, port);
+
+ if (hdr->nlmsg_type != RTM_NEWADDR) {
+ return;
+ }
+
+ // RTM_NEWADDR contains at least the header and ifaddrmsg.
+ EXPECT_GE(hdr->nlmsg_len, sizeof(*hdr) + sizeof(struct ifaddrmsg));
+
+ struct ifaddrmsg *ifaddrmsg =
+ reinterpret_cast<struct ifaddrmsg *>(NLMSG_DATA(hdr));
+ if (ifaddrmsg->ifa_index == static_cast<uint32_t>(ifr.ifr_ifindex) &&
+ ifaddrmsg->ifa_family == AF_INET) {
+ prefixlen = ifaddrmsg->ifa_prefixlen;
+ }
+ }));
+
+ ASSERT_GE(prefixlen, 0);
+
+ // Netmask is stored big endian in struct sockaddr_in, so we do the same for
+ // comparison.
+ uint32_t mask = 0xffffffff << (32 - prefixlen);
+ mask = absl::gbswap_32(mask);
+
+ // Check that the loopback interface has the correct subnet mask.
+ snprintf(ifr.ifr_name, IFNAMSIZ, "lo");
+ ASSERT_THAT(ioctl(sock.get(), SIOCGIFNETMASK, &ifr), SyscallSucceeds());
+ EXPECT_EQ(ifr.ifr_netmask.sa_family, AF_INET);
+ struct sockaddr_in *sin =
+ reinterpret_cast<struct sockaddr_in *>(&ifr.ifr_netmask);
+ EXPECT_EQ(sin->sin_addr.s_addr, mask);
+}
+
+TEST(NetdeviceTest, InterfaceName) {
+ FileDescriptor sock =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0));
+
+ // Prepare the request.
+ struct ifreq ifr;
+ snprintf(ifr.ifr_name, IFNAMSIZ, "lo");
+
+ // Check for a non-zero interface index.
+ ASSERT_THAT(ioctl(sock.get(), SIOCGIFINDEX, &ifr), SyscallSucceeds());
+ EXPECT_NE(ifr.ifr_ifindex, 0);
+
+ // Check that SIOCGIFNAME finds the loopback interface.
+ snprintf(ifr.ifr_name, IFNAMSIZ, "foo");
+ ASSERT_THAT(ioctl(sock.get(), SIOCGIFNAME, &ifr), SyscallSucceeds());
+ EXPECT_STREQ(ifr.ifr_name, "lo");
+}
+
+TEST(NetdeviceTest, InterfaceFlags) {
+ FileDescriptor sock =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0));
+
+ // Prepare the request.
+ struct ifreq ifr;
+ snprintf(ifr.ifr_name, IFNAMSIZ, "lo");
+
+ // Check that SIOCGIFFLAGS marks the interface with IFF_LOOPBACK, IFF_UP, and
+ // IFF_RUNNING.
+ ASSERT_THAT(ioctl(sock.get(), SIOCGIFFLAGS, &ifr), SyscallSucceeds());
+ EXPECT_EQ(ifr.ifr_flags & IFF_UP, IFF_UP);
+ EXPECT_EQ(ifr.ifr_flags & IFF_RUNNING, IFF_RUNNING);
+}
+
+TEST(NetdeviceTest, InterfaceMTU) {
+ FileDescriptor sock =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0));
+
+ // Prepare the request.
+ struct ifreq ifr = {};
+ snprintf(ifr.ifr_name, IFNAMSIZ, "lo");
+
+ // Check that SIOCGIFMTU returns a nonzero MTU.
+ ASSERT_THAT(ioctl(sock.get(), SIOCGIFMTU, &ifr), SyscallSucceeds());
+ EXPECT_GT(ifr.ifr_mtu, 0);
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_netlink_route.cc b/test/syscalls/linux/socket_netlink_route.cc
new file mode 100644
index 000000000..9fc695460
--- /dev/null
+++ b/test/syscalls/linux/socket_netlink_route.cc
@@ -0,0 +1,314 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <ifaddrs.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_netlink_util.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/cleanup.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/test_util.h"
+
+// Tests for NETLINK_ROUTE sockets.
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+using ::testing::AnyOf;
+using ::testing::Eq;
+
+// Netlink sockets must be SOCK_DGRAM or SOCK_RAW.
+TEST(NetlinkRouteTest, Types) {
+ EXPECT_THAT(socket(AF_NETLINK, SOCK_STREAM, NETLINK_ROUTE),
+ SyscallFailsWithErrno(ESOCKTNOSUPPORT));
+ EXPECT_THAT(socket(AF_NETLINK, SOCK_SEQPACKET, NETLINK_ROUTE),
+ SyscallFailsWithErrno(ESOCKTNOSUPPORT));
+ EXPECT_THAT(socket(AF_NETLINK, SOCK_RDM, NETLINK_ROUTE),
+ SyscallFailsWithErrno(ESOCKTNOSUPPORT));
+ EXPECT_THAT(socket(AF_NETLINK, SOCK_DCCP, NETLINK_ROUTE),
+ SyscallFailsWithErrno(ESOCKTNOSUPPORT));
+ EXPECT_THAT(socket(AF_NETLINK, SOCK_PACKET, NETLINK_ROUTE),
+ SyscallFailsWithErrno(ESOCKTNOSUPPORT));
+
+ int fd;
+ EXPECT_THAT(fd = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE),
+ SyscallSucceeds());
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+
+ EXPECT_THAT(fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE),
+ SyscallSucceeds());
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+TEST(NetlinkRouteTest, AutomaticPort) {
+ FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE));
+
+ struct sockaddr_nl addr = {};
+ addr.nl_family = AF_NETLINK;
+
+ EXPECT_THAT(
+ bind(fd.get(), reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr)),
+ SyscallSucceeds());
+
+ socklen_t addrlen = sizeof(addr);
+ EXPECT_THAT(getsockname(fd.get(), reinterpret_cast<struct sockaddr*>(&addr),
+ &addrlen),
+ SyscallSucceeds());
+ EXPECT_EQ(addrlen, sizeof(addr));
+ // This is the only netlink socket in the process, so it should get the PID as
+ // the port id.
+ //
+ // N.B. Another process could theoretically have explicitly reserved our pid
+ // as a port ID, but that is very unlikely.
+ EXPECT_EQ(addr.nl_pid, getpid());
+}
+
+// Calling connect automatically binds to an automatic port.
+TEST(NetlinkRouteTest, ConnectBinds) {
+ FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE));
+
+ struct sockaddr_nl addr = {};
+ addr.nl_family = AF_NETLINK;
+
+ EXPECT_THAT(connect(fd.get(), reinterpret_cast<struct sockaddr*>(&addr),
+ sizeof(addr)),
+ SyscallSucceeds());
+
+ socklen_t addrlen = sizeof(addr);
+ EXPECT_THAT(getsockname(fd.get(), reinterpret_cast<struct sockaddr*>(&addr),
+ &addrlen),
+ SyscallSucceeds());
+ EXPECT_EQ(addrlen, sizeof(addr));
+ // This is the only netlink socket in the process, so it should get the PID as
+ // the port id.
+ //
+ // N.B. Another process could theoretically have explicitly reserved our pid
+ // as a port ID, but that is very unlikely.
+ EXPECT_EQ(addr.nl_pid, getpid());
+
+ memset(&addr, 0, sizeof(addr));
+ addr.nl_family = AF_NETLINK;
+
+ // Connecting again is allowed, but keeps the same port.
+ EXPECT_THAT(connect(fd.get(), reinterpret_cast<struct sockaddr*>(&addr),
+ sizeof(addr)),
+ SyscallSucceeds());
+
+ addrlen = sizeof(addr);
+ EXPECT_THAT(getsockname(fd.get(), reinterpret_cast<struct sockaddr*>(&addr),
+ &addrlen),
+ SyscallSucceeds());
+ EXPECT_EQ(addrlen, sizeof(addr));
+ EXPECT_EQ(addr.nl_pid, getpid());
+}
+
+TEST(NetlinkRouteTest, GetPeerName) {
+ FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE));
+
+ struct sockaddr_nl addr = {};
+ socklen_t addrlen = sizeof(addr);
+
+ EXPECT_THAT(getpeername(fd.get(), reinterpret_cast<struct sockaddr*>(&addr),
+ &addrlen),
+ SyscallSucceeds());
+
+ EXPECT_EQ(addrlen, sizeof(addr));
+ EXPECT_EQ(addr.nl_family, AF_NETLINK);
+ // Peer is the kernel if we didn't connect elsewhere.
+ EXPECT_EQ(addr.nl_pid, 0);
+}
+
+using IntSockOptTest = ::testing::TestWithParam<int>;
+
+TEST_P(IntSockOptTest, GetSockOpt) {
+ FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE));
+
+ int res;
+ socklen_t len = sizeof(res);
+
+ EXPECT_THAT(getsockopt(fd.get(), SOL_SOCKET, GetParam(), &res, &len),
+ SyscallSucceeds());
+
+ EXPECT_EQ(len, sizeof(res));
+ EXPECT_GT(res, 0);
+}
+
+INSTANTIATE_TEST_CASE_P(NetlinkRouteTest, IntSockOptTest,
+ ::testing::Values(SO_SNDBUF, SO_RCVBUF));
+
+// Validates the reponses to RTM_GETLINK + NLM_F_DUMP.
+void CheckGetLinkResponse(const struct nlmsghdr* hdr, int seq, int port) {
+ EXPECT_THAT(hdr->nlmsg_type, AnyOf(Eq(RTM_NEWLINK), Eq(NLMSG_DONE)));
+
+ EXPECT_TRUE((hdr->nlmsg_flags & NLM_F_MULTI) == NLM_F_MULTI)
+ << std::hex << hdr->nlmsg_flags;
+
+ EXPECT_EQ(hdr->nlmsg_seq, seq);
+ EXPECT_EQ(hdr->nlmsg_pid, port);
+
+ if (hdr->nlmsg_type != RTM_NEWLINK) {
+ return;
+ }
+
+ // RTM_NEWLINK contains at least the header and ifinfomsg.
+ EXPECT_GE(hdr->nlmsg_len, NLMSG_SPACE(sizeof(struct ifinfomsg)));
+
+ // TODO: Check ifinfomsg contents and following attrs.
+}
+
+TEST(NetlinkRouteTest, GetLinkDump) {
+ FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket());
+ uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get()));
+
+ struct request {
+ struct nlmsghdr hdr;
+ struct ifinfomsg ifm;
+ };
+
+ constexpr uint32_t kSeq = 12345;
+
+ struct request req = {};
+ req.hdr.nlmsg_len = sizeof(req);
+ req.hdr.nlmsg_type = RTM_GETLINK;
+ req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+ req.hdr.nlmsg_seq = kSeq;
+ req.ifm.ifi_family = AF_UNSPEC;
+
+ // Loopback is common among all tests, check that it's found.
+ bool loopbackFound = false;
+ ASSERT_NO_ERRNO(NetlinkRequestResponse(
+ fd, &req, sizeof(req), [&](const struct nlmsghdr* hdr) {
+ CheckGetLinkResponse(hdr, kSeq, port);
+ if (hdr->nlmsg_type != RTM_NEWLINK) {
+ return;
+ }
+ ASSERT_GE(hdr->nlmsg_len, NLMSG_SPACE(sizeof(struct ifinfomsg)));
+ const struct ifinfomsg* msg =
+ reinterpret_cast<const struct ifinfomsg*>(NLMSG_DATA(hdr));
+ LOG(INFO) << "Found interface idx=" << msg->ifi_index
+ << ", type=" << std::hex << msg->ifi_type;
+ if (msg->ifi_type == ARPHRD_LOOPBACK) {
+ loopbackFound = true;
+ EXPECT_NE(msg->ifi_flags & IFF_LOOPBACK, 0);
+ }
+ }));
+ EXPECT_TRUE(loopbackFound);
+}
+
+TEST(NetlinkRouteTest, ControlMessageIgnored) {
+ FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket());
+ uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get()));
+
+ struct request {
+ struct nlmsghdr control_hdr;
+ struct nlmsghdr message_hdr;
+ struct ifinfomsg ifm;
+ };
+
+ constexpr uint32_t kSeq = 12345;
+
+ struct request req = {};
+
+ // This control message is ignored. We still receive a response for the
+ // following RTM_GETLINK.
+ req.control_hdr.nlmsg_len = sizeof(req.control_hdr);
+ req.control_hdr.nlmsg_type = NLMSG_DONE;
+ req.control_hdr.nlmsg_seq = kSeq;
+
+ req.message_hdr.nlmsg_len = sizeof(req.message_hdr) + sizeof(req.ifm);
+ req.message_hdr.nlmsg_type = RTM_GETLINK;
+ req.message_hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+ req.message_hdr.nlmsg_seq = kSeq;
+
+ req.ifm.ifi_family = AF_UNSPEC;
+
+ ASSERT_NO_ERRNO(NetlinkRequestResponse(
+ fd, &req, sizeof(req), [&](const struct nlmsghdr* hdr) {
+ CheckGetLinkResponse(hdr, kSeq, port);
+ }));
+}
+
+TEST(NetlinkRouteTest, GetAddrDump) {
+ FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket());
+ uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get()));
+
+ struct request {
+ struct nlmsghdr hdr;
+ struct rtgenmsg rgm;
+ };
+
+ constexpr uint32_t kSeq = 12345;
+
+ struct request req;
+ req.hdr.nlmsg_len = sizeof(req);
+ req.hdr.nlmsg_type = RTM_GETADDR;
+ req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+ req.hdr.nlmsg_seq = kSeq;
+ req.rgm.rtgen_family = AF_UNSPEC;
+
+ ASSERT_NO_ERRNO(NetlinkRequestResponse(
+ fd, &req, sizeof(req), [&](const struct nlmsghdr* hdr) {
+ EXPECT_THAT(hdr->nlmsg_type, AnyOf(Eq(RTM_NEWADDR), Eq(NLMSG_DONE)));
+
+ EXPECT_TRUE((hdr->nlmsg_flags & NLM_F_MULTI) == NLM_F_MULTI)
+ << std::hex << hdr->nlmsg_flags;
+
+ EXPECT_EQ(hdr->nlmsg_seq, kSeq);
+ EXPECT_EQ(hdr->nlmsg_pid, port);
+
+ if (hdr->nlmsg_type != RTM_NEWADDR) {
+ return;
+ }
+
+ // RTM_NEWADDR contains at least the header and ifaddrmsg.
+ EXPECT_GE(hdr->nlmsg_len, sizeof(*hdr) + sizeof(struct ifaddrmsg));
+
+ // TODO: Check ifaddrmsg contents and following attrs.
+ }));
+}
+
+TEST(NetlinkRouteTest, LookupAll) {
+ struct ifaddrs* if_addr_list = nullptr;
+ auto cleanup = Cleanup([&if_addr_list]() { freeifaddrs(if_addr_list); });
+
+ // Not a syscall but we can use the syscall matcher as glibc sets errno.
+ ASSERT_THAT(getifaddrs(&if_addr_list), SyscallSucceeds());
+
+ int count = 0;
+ for (struct ifaddrs* i = if_addr_list; i; i = i->ifa_next) {
+ if (!i->ifa_addr || (i->ifa_addr->sa_family != AF_INET &&
+ i->ifa_addr->sa_family != AF_INET6)) {
+ continue;
+ }
+ count++;
+ }
+ ASSERT_GT(count, 0);
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_netlink_util.cc b/test/syscalls/linux/socket_netlink_util.cc
new file mode 100644
index 000000000..ee0e03966
--- /dev/null
+++ b/test/syscalls/linux/socket_netlink_util.cc
@@ -0,0 +1,100 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <linux/if_arp.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+
+#include <sys/socket.h>
+#include <vector>
+
+#include "absl/strings/str_cat.h"
+#include "test/syscalls/linux/socket_netlink_util.h"
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+PosixErrorOr<FileDescriptor> NetlinkBoundSocket() {
+ FileDescriptor fd;
+ ASSIGN_OR_RETURN_ERRNO(fd, Socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE));
+
+ struct sockaddr_nl addr = {};
+ addr.nl_family = AF_NETLINK;
+
+ RETURN_ERROR_IF_SYSCALL_FAIL(
+ bind(fd.get(), reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr)));
+ MaybeSave();
+
+ return std::move(fd);
+}
+
+PosixErrorOr<uint32_t> NetlinkPortID(int fd) {
+ struct sockaddr_nl addr;
+ socklen_t addrlen = sizeof(addr);
+
+ RETURN_ERROR_IF_SYSCALL_FAIL(
+ getsockname(fd, reinterpret_cast<struct sockaddr*>(&addr), &addrlen));
+ MaybeSave();
+
+ return static_cast<uint32_t>(addr.nl_pid);
+}
+
+PosixError NetlinkRequestResponse(
+ const FileDescriptor& fd, void* request, size_t len,
+ const std::function<void(const struct nlmsghdr* hdr)>& fn) {
+ struct iovec iov = {};
+ iov.iov_base = request;
+ iov.iov_len = len;
+
+ struct msghdr msg = {};
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ // No destination required; it defaults to pid 0, the kernel.
+
+ RETURN_ERROR_IF_SYSCALL_FAIL(RetryEINTR(sendmsg)(fd.get(), &msg, 0));
+
+ constexpr size_t kBufferSize = 4096;
+ std::vector<char> buf(kBufferSize);
+ iov.iov_base = buf.data();
+ iov.iov_len = buf.size();
+
+ // Response is a series of NLM_F_MULTI messages, ending with a NLMSG_DONE
+ // message.
+ int type = -1;
+ do {
+ int len;
+ RETURN_ERROR_IF_SYSCALL_FAIL(len = RetryEINTR(recvmsg)(fd.get(), &msg, 0));
+
+ // We don't bother with the complexity of dealing with truncated messages.
+ // We must allocate a large enough buffer up front.
+ if ((msg.msg_flags & MSG_TRUNC) == MSG_TRUNC) {
+ return PosixError(EIO,
+ absl::StrCat("Received truncated message with flags: ",
+ msg.msg_flags));
+ }
+
+ for (struct nlmsghdr* hdr = reinterpret_cast<struct nlmsghdr*>(buf.data());
+ NLMSG_OK(hdr, len); hdr = NLMSG_NEXT(hdr, len)) {
+ fn(hdr);
+ type = hdr->nlmsg_type;
+ }
+ } while (type != NLMSG_DONE && type != NLMSG_ERROR);
+
+ EXPECT_EQ(type, NLMSG_DONE);
+ return NoError();
+}
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_netlink_util.h b/test/syscalls/linux/socket_netlink_util.h
new file mode 100644
index 000000000..44b1f148c
--- /dev/null
+++ b/test/syscalls/linux/socket_netlink_util.h
@@ -0,0 +1,42 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_SOCKET_NETLINK_UTIL_H_
+#define GVISOR_TEST_SYSCALLS_SOCKET_NETLINK_UTIL_H_
+
+#include <linux/if_arp.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+
+#include "test/util/file_descriptor.h"
+#include "test/util/posix_error.h"
+
+namespace gvisor {
+namespace testing {
+
+// Returns a bound NETLINK_ROUTE socket.
+PosixErrorOr<FileDescriptor> NetlinkBoundSocket();
+
+// Returns the port ID of the passed socket.
+PosixErrorOr<uint32_t> NetlinkPortID(int fd);
+
+// Send the passed request and call fn will all response netlink messages.
+PosixError NetlinkRequestResponse(
+ const FileDescriptor& fd, void* request, size_t len,
+ const std::function<void(const struct nlmsghdr* hdr)>& fn);
+
+} // namespace testing
+} // namespace gvisor
+
+#endif // GVISOR_TEST_SYSCALLS_SOCKET_NETLINK_UTIL_H_
diff --git a/test/syscalls/linux/socket_non_blocking.cc b/test/syscalls/linux/socket_non_blocking.cc
new file mode 100644
index 000000000..1bcc6fb7f
--- /dev/null
+++ b/test/syscalls/linux/socket_non_blocking.cc
@@ -0,0 +1,63 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_non_blocking.h"
+
+#include <stdio.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+TEST_P(NonBlockingSocketPairTest, ReadNothingAvailable) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ char buf[20] = {};
+ ASSERT_THAT(ReadFd(sockets->first_fd(), buf, sizeof(buf)),
+ SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST_P(NonBlockingSocketPairTest, RecvNothingAvailable) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ char buf[20] = {};
+ ASSERT_THAT(RetryEINTR(recv)(sockets->first_fd(), buf, sizeof(buf), 0),
+ SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST_P(NonBlockingSocketPairTest, RecvMsgNothingAvailable) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ struct iovec iov;
+ char buf[20] = {};
+ iov.iov_base = buf;
+ iov.iov_len = sizeof(buf);
+ struct msghdr msg = {};
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ ASSERT_THAT(RetryEINTR(recvmsg)(sockets->first_fd(), &msg, 0),
+ SyscallFailsWithErrno(EAGAIN));
+}
+
+} // namespace
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_non_blocking.h b/test/syscalls/linux/socket_non_blocking.h
new file mode 100644
index 000000000..287e096bb
--- /dev/null
+++ b/test/syscalls/linux/socket_non_blocking.h
@@ -0,0 +1,29 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_BLOCKING_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_BLOCKING_H_
+
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Test fixture for tests that apply to pairs of connected non-blocking sockets.
+using NonBlockingSocketPairTest = SocketPairTest;
+
+} // namespace testing
+} // namespace gvisor
+
+#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_BLOCKING_H_
diff --git a/test/syscalls/linux/socket_non_stream.cc b/test/syscalls/linux/socket_non_stream.cc
new file mode 100644
index 000000000..d49aab363
--- /dev/null
+++ b/test/syscalls/linux/socket_non_stream.cc
@@ -0,0 +1,174 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_non_stream.h"
+
+#include <stdio.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+TEST_P(NonStreamSocketPairTest, SendMsgTooLarge) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ int sndbuf;
+ socklen_t length = sizeof(sndbuf);
+ ASSERT_THAT(
+ getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF, &sndbuf, &length),
+ SyscallSucceeds());
+
+ // Make the call too large to fit in the send buffer.
+ const int buffer_size = 3 * sndbuf;
+
+ EXPECT_THAT(SendLargeSendMsg(sockets, buffer_size, false /* reader */),
+ SyscallFailsWithErrno(EMSGSIZE));
+}
+
+// Stream sockets allow data sent with a single (e.g. write, sendmsg) syscall
+// to be read in pieces with multiple (e.g. read, recvmsg) syscalls.
+//
+// SplitRecv checks that control messages can only be read on the first (e.g.
+// read, recvmsg) syscall, even if it doesn't provide space for the control
+// message.
+TEST_P(NonStreamSocketPairTest, SplitRecv) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ char sent_data[512];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+ ASSERT_THAT(
+ RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0),
+ SyscallSucceedsWithValue(sizeof(sent_data)));
+ char received_data[sizeof(sent_data) / 2];
+ ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+ sizeof(received_data), 0),
+ SyscallSucceedsWithValue(sizeof(received_data)));
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(received_data)));
+ ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+ sizeof(received_data), MSG_DONTWAIT),
+ SyscallFailsWithErrno(EWOULDBLOCK));
+}
+
+// Stream sockets allow data sent with multiple sends to be read in a single
+// recv. Datagram sockets do not.
+//
+// SingleRecv checks that only a single message is readable in a single recv.
+TEST_P(NonStreamSocketPairTest, SingleRecv) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ char sent_data1[20];
+ RandomizeBuffer(sent_data1, sizeof(sent_data1));
+ ASSERT_THAT(
+ RetryEINTR(send)(sockets->first_fd(), sent_data1, sizeof(sent_data1), 0),
+ SyscallSucceedsWithValue(sizeof(sent_data1)));
+ char sent_data2[20];
+ RandomizeBuffer(sent_data2, sizeof(sent_data2));
+ ASSERT_THAT(
+ RetryEINTR(send)(sockets->first_fd(), sent_data2, sizeof(sent_data2), 0),
+ SyscallSucceedsWithValue(sizeof(sent_data2)));
+ char received_data[sizeof(sent_data1) + sizeof(sent_data2)];
+ ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+ sizeof(received_data), 0),
+ SyscallSucceedsWithValue(sizeof(sent_data1)));
+ EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1)));
+}
+
+// Stream sockets allow data sent with multiple sends to be peeked at in a
+// single recv. Datagram sockets (except for unix sockets) do not.
+//
+// SinglePeek checks that only a single message is peekable in a single recv.
+TEST_P(NonStreamSocketPairTest, SinglePeek) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ char sent_data1[20];
+ RandomizeBuffer(sent_data1, sizeof(sent_data1));
+ ASSERT_THAT(
+ RetryEINTR(send)(sockets->first_fd(), sent_data1, sizeof(sent_data1), 0),
+ SyscallSucceedsWithValue(sizeof(sent_data1)));
+ char sent_data2[20];
+ RandomizeBuffer(sent_data2, sizeof(sent_data2));
+ ASSERT_THAT(
+ RetryEINTR(send)(sockets->first_fd(), sent_data2, sizeof(sent_data2), 0),
+ SyscallSucceedsWithValue(sizeof(sent_data2)));
+ char received_data[sizeof(sent_data1) + sizeof(sent_data2)];
+ for (int i = 0; i < 3; i++) {
+ memset(received_data, 0, sizeof(received_data));
+ ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+ sizeof(received_data), MSG_PEEK),
+ SyscallSucceedsWithValue(sizeof(sent_data1)));
+ EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1)));
+ }
+ ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+ sizeof(sent_data1), 0),
+ SyscallSucceedsWithValue(sizeof(sent_data1)));
+ EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1)));
+ ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+ sizeof(sent_data2), 0),
+ SyscallSucceedsWithValue(sizeof(sent_data2)));
+ EXPECT_EQ(0, memcmp(sent_data2, received_data, sizeof(sent_data2)));
+}
+
+TEST_P(NonStreamSocketPairTest, MsgTruncTruncation) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ char sent_data[512];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+ ASSERT_THAT(
+ RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0),
+ SyscallSucceedsWithValue(sizeof(sent_data)));
+ char received_data[sizeof(sent_data)] = {};
+ ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+ sizeof(received_data) / 2, MSG_TRUNC),
+ SyscallSucceedsWithValue(sizeof(sent_data)));
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data) / 2));
+
+ // Check that we didn't get any extra data.
+ EXPECT_NE(0, memcmp(sent_data + sizeof(sent_data) / 2,
+ received_data + sizeof(received_data) / 2,
+ sizeof(sent_data) / 2));
+}
+
+TEST_P(NonStreamSocketPairTest, MsgTruncSameSize) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ char sent_data[512];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+ ASSERT_THAT(
+ RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0),
+ SyscallSucceedsWithValue(sizeof(sent_data)));
+ char received_data[sizeof(sent_data)];
+ ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+ sizeof(received_data), MSG_TRUNC),
+ SyscallSucceedsWithValue(sizeof(received_data)));
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+}
+
+TEST_P(NonStreamSocketPairTest, MsgTruncNotFull) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ char sent_data[512];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+ ASSERT_THAT(
+ RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0),
+ SyscallSucceedsWithValue(sizeof(sent_data)));
+ char received_data[2 * sizeof(sent_data)];
+ ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+ sizeof(received_data), MSG_TRUNC),
+ SyscallSucceedsWithValue(sizeof(sent_data)));
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+}
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_non_stream.h b/test/syscalls/linux/socket_non_stream.h
new file mode 100644
index 000000000..02dd2a958
--- /dev/null
+++ b/test/syscalls/linux/socket_non_stream.h
@@ -0,0 +1,29 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_STREAM_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_STREAM_H_
+
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Test fixture for tests that apply to pairs of connected non-stream sockets.
+using NonStreamSocketPairTest = SocketPairTest;
+
+} // namespace testing
+} // namespace gvisor
+
+#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_STREAM_H_
diff --git a/test/syscalls/linux/socket_non_stream_blocking.cc b/test/syscalls/linux/socket_non_stream_blocking.cc
new file mode 100644
index 000000000..d64b181c9
--- /dev/null
+++ b/test/syscalls/linux/socket_non_stream_blocking.cc
@@ -0,0 +1,51 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_non_stream_blocking.h"
+
+#include <stdio.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+TEST_P(BlockingNonStreamSocketPairTest, RecvLessThanBufferWaitAll) {
+ SKIP_IF(IsRunningOnGvisor()); // FIXME: Support MSG_WAITALL.
+
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[100];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ ASSERT_THAT(write(sockets->first_fd(), sent_data, sizeof(sent_data)),
+ SyscallSucceedsWithValue(sizeof(sent_data)));
+
+ char received_data[sizeof(sent_data) * 2] = {};
+ ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+ sizeof(received_data), MSG_WAITALL),
+ SyscallSucceedsWithValue(sizeof(sent_data)));
+}
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_non_stream_blocking.h b/test/syscalls/linux/socket_non_stream_blocking.h
new file mode 100644
index 000000000..bde355452
--- /dev/null
+++ b/test/syscalls/linux/socket_non_stream_blocking.h
@@ -0,0 +1,30 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_STREAM_BLOCKING_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_STREAM_BLOCKING_H_
+
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Test fixture for tests that apply to pairs of blocking connected non-stream
+// sockets.
+using BlockingNonStreamSocketPairTest = SocketPairTest;
+
+} // namespace testing
+} // namespace gvisor
+
+#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NON_STREAM_BLOCKING_H_
diff --git a/test/syscalls/linux/socket_stream.cc b/test/syscalls/linux/socket_stream.cc
new file mode 100644
index 000000000..32e9d958b
--- /dev/null
+++ b/test/syscalls/linux/socket_stream.cc
@@ -0,0 +1,99 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_stream.h"
+
+#include <stdio.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "absl/time/clock.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+TEST_P(StreamSocketPairTest, SplitRecv) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ char sent_data[512];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+ ASSERT_THAT(
+ RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0),
+ SyscallSucceedsWithValue(sizeof(sent_data)));
+ char received_data[sizeof(sent_data) / 2];
+ ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+ sizeof(received_data), 0),
+ SyscallSucceedsWithValue(sizeof(received_data)));
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(received_data)));
+ ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+ sizeof(received_data), 0),
+ SyscallSucceedsWithValue(sizeof(received_data)));
+ EXPECT_EQ(0, memcmp(sent_data + sizeof(received_data), received_data,
+ sizeof(received_data)));
+}
+
+// Stream sockets allow data sent with multiple sends to be read in a single
+// recv.
+//
+// CoalescedRecv checks that multiple messages are readable in a single recv.
+TEST_P(StreamSocketPairTest, CoalescedRecv) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ char sent_data1[20];
+ RandomizeBuffer(sent_data1, sizeof(sent_data1));
+ ASSERT_THAT(
+ RetryEINTR(send)(sockets->first_fd(), sent_data1, sizeof(sent_data1), 0),
+ SyscallSucceedsWithValue(sizeof(sent_data1)));
+ char sent_data2[20];
+ RandomizeBuffer(sent_data2, sizeof(sent_data2));
+ ASSERT_THAT(
+ RetryEINTR(send)(sockets->first_fd(), sent_data2, sizeof(sent_data2), 0),
+ SyscallSucceedsWithValue(sizeof(sent_data2)));
+ char received_data[sizeof(sent_data1) + sizeof(sent_data2)];
+ ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+ sizeof(received_data), 0),
+ SyscallSucceedsWithValue(sizeof(received_data)));
+ EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1)));
+ EXPECT_EQ(0, memcmp(sent_data2, received_data + sizeof(sent_data1),
+ sizeof(sent_data2)));
+}
+
+TEST_P(StreamSocketPairTest, WriteOneSideClosed) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds());
+ const char str[] = "abc";
+ ASSERT_THAT(write(sockets->second_fd(), str, 3),
+ SyscallFailsWithErrno(EPIPE));
+}
+
+TEST_P(StreamSocketPairTest, MsgTrunc) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ char sent_data[512];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+ ASSERT_THAT(
+ RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0),
+ SyscallSucceedsWithValue(sizeof(sent_data)));
+ char received_data[sizeof(sent_data)];
+ ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+ sizeof(received_data) / 2, MSG_TRUNC),
+ SyscallSucceedsWithValue(sizeof(sent_data) / 2));
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data) / 2));
+}
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_stream.h b/test/syscalls/linux/socket_stream.h
new file mode 100644
index 000000000..35e591e17
--- /dev/null
+++ b/test/syscalls/linux/socket_stream.h
@@ -0,0 +1,30 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_H_
+
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Test fixture for tests that apply to pairs of blocking and non-blocking
+// connected stream sockets.
+using StreamSocketPairTest = SocketPairTest;
+
+} // namespace testing
+} // namespace gvisor
+
+#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_H_
diff --git a/test/syscalls/linux/socket_stream_blocking.cc b/test/syscalls/linux/socket_stream_blocking.cc
new file mode 100644
index 000000000..dd209c67c
--- /dev/null
+++ b/test/syscalls/linux/socket_stream_blocking.cc
@@ -0,0 +1,131 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_stream_blocking.h"
+
+#include <stdio.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+#include "test/util/timer_util.h"
+
+namespace gvisor {
+namespace testing {
+
+TEST_P(BlockingStreamSocketPairTest, BlockPartialWriteClosed) {
+ // FIXME: gVisor doesn't support SO_SNDBUF on UDS, nor does it
+ // enforce any limit; it will write arbitrary amounts of data without
+ // blocking.
+ SKIP_IF(IsRunningOnGvisor());
+
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ int buffer_size;
+ socklen_t length = sizeof(buffer_size);
+ ASSERT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF,
+ &buffer_size, &length),
+ SyscallSucceeds());
+
+ int wfd = sockets->first_fd();
+ ScopedThread t([wfd, buffer_size]() {
+ std::vector<char> buf(2 * buffer_size);
+ // Write more than fits in the buffer. Blocks then returns partial write
+ // when the other end is closed. The next call returns EPIPE.
+ //
+ // N.B. writes occur in chunks, so we may see less than buffer_size from
+ // the first call.
+ ASSERT_THAT(write(wfd, buf.data(), buf.size()),
+ SyscallSucceedsWithValue(::testing::Gt(0)));
+ ASSERT_THAT(write(wfd, buf.data(), buf.size()),
+ ::testing::AnyOf(SyscallFailsWithErrno(EPIPE),
+ SyscallFailsWithErrno(ECONNRESET)));
+ });
+
+ // Leave time for write to become blocked.
+ absl::SleepFor(absl::Seconds(1.0));
+
+ ASSERT_THAT(close(sockets->release_second_fd()), SyscallSucceeds());
+}
+
+TEST_P(BlockingStreamSocketPairTest, SendMsgTooLarge) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ int sndbuf;
+ socklen_t length = sizeof(sndbuf);
+ ASSERT_THAT(
+ getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF, &sndbuf, &length),
+ SyscallSucceeds());
+
+ // Make the call too large to fit in the send buffer.
+ const int buffer_size = 3 * sndbuf;
+
+ EXPECT_THAT(SendLargeSendMsg(sockets, buffer_size, true /* reader */),
+ SyscallSucceedsWithValue(buffer_size));
+}
+
+TEST_P(BlockingStreamSocketPairTest, RecvLessThanBuffer) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[100];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ ASSERT_THAT(write(sockets->first_fd(), sent_data, sizeof(sent_data)),
+ SyscallSucceedsWithValue(sizeof(sent_data)));
+
+ char received_data[200] = {};
+ ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+ sizeof(received_data), 0),
+ SyscallSucceedsWithValue(sizeof(sent_data)));
+}
+
+TEST_P(BlockingStreamSocketPairTest, RecvLessThanBufferWaitAll) {
+ SKIP_IF(IsRunningOnGvisor()); // FIXME: Support MSG_WAITALL.
+
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[100];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ ASSERT_THAT(write(sockets->first_fd(), sent_data, sizeof(sent_data)),
+ SyscallSucceedsWithValue(sizeof(sent_data)));
+
+ constexpr auto kDuration = absl::Milliseconds(200);
+ auto before = Now(CLOCK_MONOTONIC);
+
+ const ScopedThread t([&]() {
+ absl::SleepFor(kDuration);
+ ASSERT_THAT(write(sockets->first_fd(), sent_data, sizeof(sent_data)),
+ SyscallSucceedsWithValue(sizeof(sent_data)));
+ });
+
+ char received_data[sizeof(sent_data) * 2] = {};
+ ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+ sizeof(received_data), MSG_WAITALL),
+ SyscallSucceedsWithValue(sizeof(received_data)));
+
+ auto after = Now(CLOCK_MONOTONIC);
+ EXPECT_GE(after - before, kDuration);
+}
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_stream_blocking.h b/test/syscalls/linux/socket_stream_blocking.h
new file mode 100644
index 000000000..06113ad03
--- /dev/null
+++ b/test/syscalls/linux/socket_stream_blocking.h
@@ -0,0 +1,30 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_BLOCKING_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_BLOCKING_H_
+
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Test fixture for tests that apply to pairs of blocking connected stream
+// sockets.
+using BlockingStreamSocketPairTest = SocketPairTest;
+
+} // namespace testing
+} // namespace gvisor
+
+#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_BLOCKING_H_
diff --git a/test/syscalls/linux/socket_stream_nonblock.cc b/test/syscalls/linux/socket_stream_nonblock.cc
new file mode 100644
index 000000000..a3202ffe4
--- /dev/null
+++ b/test/syscalls/linux/socket_stream_nonblock.cc
@@ -0,0 +1,50 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_stream_nonblock.h"
+
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+using ::testing::Le;
+
+TEST_P(NonBlockingStreamSocketPairTest, SendMsgTooLarge) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ int sndbuf;
+ socklen_t length = sizeof(sndbuf);
+ ASSERT_THAT(
+ getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF, &sndbuf, &length),
+ SyscallSucceeds());
+
+ // Make the call too large to fit in the send buffer.
+ const int buffer_size = 3 * sndbuf;
+
+ EXPECT_THAT(SendLargeSendMsg(sockets, buffer_size, false /* reader */),
+ SyscallSucceedsWithValue(Le(buffer_size)));
+}
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_stream_nonblock.h b/test/syscalls/linux/socket_stream_nonblock.h
new file mode 100644
index 000000000..491f53848
--- /dev/null
+++ b/test/syscalls/linux/socket_stream_nonblock.h
@@ -0,0 +1,30 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_NONBLOCK_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_NONBLOCK_H_
+
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Test fixture for tests that apply to pairs of non-blocking connected stream
+// sockets.
+using NonBlockingStreamSocketPairTest = SocketPairTest;
+
+} // namespace testing
+} // namespace gvisor
+
+#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_STREAM_NONBLOCK_H_
diff --git a/test/syscalls/linux/socket_test_util.cc b/test/syscalls/linux/socket_test_util.cc
new file mode 100644
index 000000000..80a59df7e
--- /dev/null
+++ b/test/syscalls/linux/socket_test_util.cc
@@ -0,0 +1,660 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_test_util.h"
+
+#include <arpa/inet.h>
+#include <poll.h>
+#include <sys/socket.h>
+
+#include "gtest/gtest.h"
+#include "absl/memory/memory.h"
+#include "absl/strings/str_cat.h"
+#include "absl/time/clock.h"
+#include "test/util/posix_error.h"
+#include "test/util/temp_path.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+Creator<SocketPair> SyscallSocketPairCreator(int domain, int type,
+ int protocol) {
+ return [=]() -> PosixErrorOr<std::unique_ptr<FDSocketPair>> {
+ int pair[2];
+ RETURN_ERROR_IF_SYSCALL_FAIL(socketpair(domain, type, protocol, pair));
+ MaybeSave(); // Save on successful creation.
+ return absl::make_unique<FDSocketPair>(pair[0], pair[1]);
+ };
+}
+
+Creator<FileDescriptor> SyscallSocketCreator(int domain, int type,
+ int protocol) {
+ return [=]() -> PosixErrorOr<std::unique_ptr<FileDescriptor>> {
+ int fd = 0;
+ RETURN_ERROR_IF_SYSCALL_FAIL(fd = socket(domain, type, protocol));
+ MaybeSave(); // Save on successful creation.
+ return absl::make_unique<FileDescriptor>(fd);
+ };
+}
+
+PosixErrorOr<struct sockaddr_un> UniqueUnixAddr(bool abstract, int domain) {
+ struct sockaddr_un addr = {};
+ std::string path = NewTempAbsPathInDir("/tmp");
+ if (path.size() >= sizeof(addr.sun_path)) {
+ return PosixError(EINVAL,
+ "Unable to generate a temp path of appropriate length");
+ }
+
+ if (abstract) {
+ // Indicate that the path is in the abstract namespace.
+ path[0] = 0;
+ }
+ memcpy(addr.sun_path, path.c_str(), path.length());
+ addr.sun_family = domain;
+ return addr;
+}
+
+Creator<SocketPair> AcceptBindSocketPairCreator(bool abstract, int domain,
+ int type, int protocol) {
+ return [=]() -> PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> {
+ ASSIGN_OR_RETURN_ERRNO(struct sockaddr_un bind_addr,
+ UniqueUnixAddr(abstract, domain));
+ ASSIGN_OR_RETURN_ERRNO(struct sockaddr_un extra_addr,
+ UniqueUnixAddr(abstract, domain));
+
+ int bound;
+ RETURN_ERROR_IF_SYSCALL_FAIL(bound = socket(domain, type, protocol));
+ MaybeSave(); // Successful socket creation.
+ RETURN_ERROR_IF_SYSCALL_FAIL(
+ bind(bound, reinterpret_cast<struct sockaddr*>(&bind_addr),
+ sizeof(bind_addr)));
+ MaybeSave(); // Successful bind.
+ RETURN_ERROR_IF_SYSCALL_FAIL(listen(bound, /* backlog = */ 5));
+ MaybeSave(); // Successful listen.
+
+ int connected;
+ RETURN_ERROR_IF_SYSCALL_FAIL(connected = socket(domain, type, protocol));
+ MaybeSave(); // Successful socket creation.
+ RETURN_ERROR_IF_SYSCALL_FAIL(
+ connect(connected, reinterpret_cast<struct sockaddr*>(&bind_addr),
+ sizeof(bind_addr)));
+ MaybeSave(); // Successful connect.
+
+ int accepted;
+ RETURN_ERROR_IF_SYSCALL_FAIL(
+ accepted = accept4(bound, nullptr, nullptr,
+ type & (SOCK_NONBLOCK | SOCK_CLOEXEC)));
+ MaybeSave(); // Successful connect.
+
+ // Cleanup no longer needed resources.
+ RETURN_ERROR_IF_SYSCALL_FAIL(close(bound));
+ MaybeSave(); // Dropped original socket.
+
+ // Only unlink if path is not in abstract namespace.
+ if (bind_addr.sun_path[0] != 0) {
+ RETURN_ERROR_IF_SYSCALL_FAIL(unlink(bind_addr.sun_path));
+ MaybeSave(); // Unlinked path.
+ }
+
+ return absl::make_unique<AddrFDSocketPair>(connected, accepted, bind_addr,
+ extra_addr);
+ };
+}
+
+Creator<SocketPair> FilesystemAcceptBindSocketPairCreator(int domain, int type,
+ int protocol) {
+ return AcceptBindSocketPairCreator(/* abstract= */ false, domain, type,
+ protocol);
+}
+
+Creator<SocketPair> AbstractAcceptBindSocketPairCreator(int domain, int type,
+ int protocol) {
+ return AcceptBindSocketPairCreator(/* abstract= */ true, domain, type,
+ protocol);
+}
+
+Creator<SocketPair> BidirectionalBindSocketPairCreator(bool abstract,
+ int domain, int type,
+ int protocol) {
+ return [=]() -> PosixErrorOr<std::unique_ptr<FDSocketPair>> {
+ ASSIGN_OR_RETURN_ERRNO(struct sockaddr_un addr1,
+ UniqueUnixAddr(abstract, domain));
+ ASSIGN_OR_RETURN_ERRNO(struct sockaddr_un addr2,
+ UniqueUnixAddr(abstract, domain));
+
+ int sock1;
+ RETURN_ERROR_IF_SYSCALL_FAIL(sock1 = socket(domain, type, protocol));
+ MaybeSave(); // Successful socket creation.
+ RETURN_ERROR_IF_SYSCALL_FAIL(
+ bind(sock1, reinterpret_cast<struct sockaddr*>(&addr1), sizeof(addr1)));
+ MaybeSave(); // Successful bind.
+
+ int sock2;
+ RETURN_ERROR_IF_SYSCALL_FAIL(sock2 = socket(domain, type, protocol));
+ MaybeSave(); // Successful socket creation.
+ RETURN_ERROR_IF_SYSCALL_FAIL(
+ bind(sock2, reinterpret_cast<struct sockaddr*>(&addr2), sizeof(addr2)));
+ MaybeSave(); // Successful bind.
+
+ RETURN_ERROR_IF_SYSCALL_FAIL(connect(
+ sock1, reinterpret_cast<struct sockaddr*>(&addr2), sizeof(addr2)));
+ MaybeSave(); // Successful connect.
+
+ RETURN_ERROR_IF_SYSCALL_FAIL(connect(
+ sock2, reinterpret_cast<struct sockaddr*>(&addr1), sizeof(addr1)));
+ MaybeSave(); // Successful connect.
+
+ // Cleanup no longer needed resources.
+
+ // Only unlink if path is not in abstract namespace.
+ if (addr1.sun_path[0] != 0) {
+ RETURN_ERROR_IF_SYSCALL_FAIL(unlink(addr1.sun_path));
+ MaybeSave(); // Successful unlink.
+ }
+
+ // Only unlink if path is not in abstract namespace.
+ if (addr2.sun_path[0] != 0) {
+ RETURN_ERROR_IF_SYSCALL_FAIL(unlink(addr2.sun_path));
+ MaybeSave(); // Successful unlink.
+ }
+
+ return absl::make_unique<FDSocketPair>(sock1, sock2);
+ };
+}
+
+Creator<SocketPair> FilesystemBidirectionalBindSocketPairCreator(int domain,
+ int type,
+ int protocol) {
+ return BidirectionalBindSocketPairCreator(/* abstract= */ false, domain, type,
+ protocol);
+}
+
+Creator<SocketPair> AbstractBidirectionalBindSocketPairCreator(int domain,
+ int type,
+ int protocol) {
+ return BidirectionalBindSocketPairCreator(/* abstract= */ true, domain, type,
+ protocol);
+}
+
+Creator<SocketPair> SocketpairGoferSocketPairCreator(int domain, int type,
+ int protocol) {
+ return [=]() -> PosixErrorOr<std::unique_ptr<FDSocketPair>> {
+ struct sockaddr_un addr = {};
+ constexpr char kSocketGoferPath[] = "/socket";
+ memcpy(addr.sun_path, kSocketGoferPath, sizeof(kSocketGoferPath));
+ addr.sun_family = domain;
+
+ int sock1;
+ RETURN_ERROR_IF_SYSCALL_FAIL(sock1 = socket(domain, type, protocol));
+ MaybeSave(); // Successful socket creation.
+ RETURN_ERROR_IF_SYSCALL_FAIL(connect(
+ sock1, reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr)));
+ MaybeSave(); // Successful connect.
+
+ int sock2;
+ RETURN_ERROR_IF_SYSCALL_FAIL(sock2 = socket(domain, type, protocol));
+ MaybeSave(); // Successful socket creation.
+ RETURN_ERROR_IF_SYSCALL_FAIL(connect(
+ sock2, reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr)));
+ MaybeSave(); // Successful connect.
+
+ // Make and close another socketpair to ensure that the duped ends of the
+ // first socketpair get closed.
+ //
+ // The problem is that there is no way to atomically send and close an FD.
+ // The closest that we can do is send and then immediately close the FD,
+ // which is what we do in the gofer. The gofer won't respond to another
+ // request until the reply is sent and the FD is closed, so forcing the
+ // gofer to handle another request will ensure that this has happened.
+ for (int i = 0; i < 2; i++) {
+ int sock;
+ RETURN_ERROR_IF_SYSCALL_FAIL(sock = socket(domain, type, protocol));
+ RETURN_ERROR_IF_SYSCALL_FAIL(connect(
+ sock, reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr)));
+ RETURN_ERROR_IF_SYSCALL_FAIL(close(sock));
+ }
+
+ return absl::make_unique<FDSocketPair>(sock1, sock2);
+ };
+}
+
+Creator<SocketPair> SocketpairGoferFileSocketPairCreator(int flags) {
+ return [=]() -> PosixErrorOr<std::unique_ptr<FDSocketPair>> {
+ constexpr char kSocketGoferPath[] = "/socket";
+
+ int sock1;
+ RETURN_ERROR_IF_SYSCALL_FAIL(sock1 =
+ open(kSocketGoferPath, O_RDWR | flags));
+ MaybeSave(); // Successful socket creation.
+
+ int sock2;
+ RETURN_ERROR_IF_SYSCALL_FAIL(sock2 =
+ open(kSocketGoferPath, O_RDWR | flags));
+ MaybeSave(); // Successful socket creation.
+
+ return absl::make_unique<FDSocketPair>(sock1, sock2);
+ };
+}
+
+Creator<SocketPair> UnboundSocketPairCreator(bool abstract, int domain,
+ int type, int protocol) {
+ return [=]() -> PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> {
+ ASSIGN_OR_RETURN_ERRNO(struct sockaddr_un addr1,
+ UniqueUnixAddr(abstract, domain));
+ ASSIGN_OR_RETURN_ERRNO(struct sockaddr_un addr2,
+ UniqueUnixAddr(abstract, domain));
+
+ int sock1;
+ RETURN_ERROR_IF_SYSCALL_FAIL(sock1 = socket(domain, type, protocol));
+ MaybeSave(); // Successful socket creation.
+ int sock2;
+ RETURN_ERROR_IF_SYSCALL_FAIL(sock2 = socket(domain, type, protocol));
+ MaybeSave(); // Successful socket creation.
+ return absl::make_unique<AddrFDSocketPair>(sock1, sock2, addr1, addr2);
+ };
+}
+
+Creator<SocketPair> FilesystemUnboundSocketPairCreator(int domain, int type,
+ int protocol) {
+ return UnboundSocketPairCreator(/* abstract= */ false, domain, type,
+ protocol);
+}
+
+Creator<SocketPair> AbstractUnboundSocketPairCreator(int domain, int type,
+ int protocol) {
+ return UnboundSocketPairCreator(/* abstract= */ true, domain, type, protocol);
+}
+
+void LocalhostAddr(struct sockaddr_in* addr, bool dual_stack) {
+ addr->sin_family = AF_INET;
+ addr->sin_port = htons(0);
+ inet_pton(AF_INET, "127.0.0.1",
+ reinterpret_cast<void*>(&addr->sin_addr.s_addr));
+}
+
+void LocalhostAddr(struct sockaddr_in6* addr, bool dual_stack) {
+ addr->sin6_family = AF_INET6;
+ addr->sin6_port = htons(0);
+ if (dual_stack) {
+ inet_pton(AF_INET6, "::ffff:127.0.0.1",
+ reinterpret_cast<void*>(&addr->sin6_addr.s6_addr));
+ } else {
+ inet_pton(AF_INET6, "::1",
+ reinterpret_cast<void*>(&addr->sin6_addr.s6_addr));
+ }
+ addr->sin6_scope_id = 0;
+}
+
+template <typename T>
+PosixErrorOr<T> BindIP(int fd, bool dual_stack) {
+ T addr = {};
+ LocalhostAddr(&addr, dual_stack);
+ RETURN_ERROR_IF_SYSCALL_FAIL(
+ bind(fd, reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr)));
+ socklen_t addrlen = sizeof(addr);
+ RETURN_ERROR_IF_SYSCALL_FAIL(
+ getsockname(fd, reinterpret_cast<struct sockaddr*>(&addr), &addrlen));
+ return addr;
+}
+
+template <typename T>
+PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> CreateTCPAcceptBindSocketPair(
+ int bound, int connected, int type, bool dual_stack) {
+ ASSIGN_OR_RETURN_ERRNO(T bind_addr, BindIP<T>(bound, dual_stack));
+ RETURN_ERROR_IF_SYSCALL_FAIL(listen(bound, /* backlog = */ 5));
+
+ int connect_result = 0;
+ RETURN_ERROR_IF_SYSCALL_FAIL(
+ (connect_result = RetryEINTR(connect)(
+ connected, reinterpret_cast<struct sockaddr*>(&bind_addr),
+ sizeof(bind_addr))) == -1 &&
+ errno == EINPROGRESS
+ ? 0
+ : connect_result);
+ MaybeSave(); // Successful connect.
+
+ if (connect_result == -1) {
+ struct pollfd connect_poll = {connected, POLLOUT | POLLERR | POLLHUP, 0};
+ RETURN_ERROR_IF_SYSCALL_FAIL(RetryEINTR(poll)(&connect_poll, 1, 0));
+ int error = 0;
+ socklen_t errorlen = sizeof(error);
+ RETURN_ERROR_IF_SYSCALL_FAIL(
+ getsockopt(connected, SOL_SOCKET, SO_ERROR, &error, &errorlen));
+ errno = error;
+ RETURN_ERROR_IF_SYSCALL_FAIL(
+ /* connect */ error == 0 ? 0 : -1);
+ }
+
+ int accepted = -1;
+ struct pollfd accept_poll = {bound, POLLIN, 0};
+ while (accepted == -1) {
+ RETURN_ERROR_IF_SYSCALL_FAIL(RetryEINTR(poll)(&accept_poll, 1, 0));
+
+ RETURN_ERROR_IF_SYSCALL_FAIL(
+ (accepted = RetryEINTR(accept4)(
+ bound, nullptr, nullptr, type & (SOCK_NONBLOCK | SOCK_CLOEXEC))) ==
+ -1 &&
+ errno == EAGAIN
+ ? 0
+ : accepted);
+ }
+ MaybeSave(); // Successful accept.
+
+ // FIXME
+ if (connect_result == -1) {
+ absl::SleepFor(absl::Seconds(1));
+ }
+
+ // Cleanup no longer needed resources.
+ RETURN_ERROR_IF_SYSCALL_FAIL(close(bound));
+ MaybeSave(); // Successful close.
+
+ T extra_addr = {};
+ LocalhostAddr(&extra_addr, dual_stack);
+ return absl::make_unique<AddrFDSocketPair>(connected, accepted, bind_addr,
+ extra_addr);
+}
+
+Creator<SocketPair> TCPAcceptBindSocketPairCreator(int domain, int type,
+ int protocol,
+ bool dual_stack) {
+ return [=]() -> PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> {
+ int bound;
+ RETURN_ERROR_IF_SYSCALL_FAIL(bound = socket(domain, type, protocol));
+ MaybeSave(); // Successful socket creation.
+
+ int connected;
+ RETURN_ERROR_IF_SYSCALL_FAIL(connected = socket(domain, type, protocol));
+ MaybeSave(); // Successful socket creation.
+
+ if (domain == AF_INET) {
+ return CreateTCPAcceptBindSocketPair<sockaddr_in>(bound, connected, type,
+ dual_stack);
+ }
+ return CreateTCPAcceptBindSocketPair<sockaddr_in6>(bound, connected, type,
+ dual_stack);
+ };
+}
+
+template <typename T>
+PosixErrorOr<std::unique_ptr<AddrFDSocketPair>>
+CreateUDPBidirectionalBindSocketPair(int sock1, int sock2, int type,
+ bool dual_stack) {
+ ASSIGN_OR_RETURN_ERRNO(T addr1, BindIP<T>(sock1, dual_stack));
+ ASSIGN_OR_RETURN_ERRNO(T addr2, BindIP<T>(sock2, dual_stack));
+
+ // Connect sock1 to sock2.
+ RETURN_ERROR_IF_SYSCALL_FAIL(connect(
+ sock1, reinterpret_cast<struct sockaddr*>(&addr2), sizeof(addr2)));
+ MaybeSave(); // Successful connection.
+
+ // Connect sock2 to sock1.
+ RETURN_ERROR_IF_SYSCALL_FAIL(connect(
+ sock2, reinterpret_cast<struct sockaddr*>(&addr1), sizeof(addr1)));
+ MaybeSave(); // Successful connection.
+
+ return absl::make_unique<AddrFDSocketPair>(sock1, sock2, addr1, addr2);
+}
+
+Creator<SocketPair> UDPBidirectionalBindSocketPairCreator(int domain, int type,
+ int protocol,
+ bool dual_stack) {
+ return [=]() -> PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> {
+ int sock1;
+ RETURN_ERROR_IF_SYSCALL_FAIL(sock1 = socket(domain, type, protocol));
+ MaybeSave(); // Successful socket creation.
+
+ int sock2;
+ RETURN_ERROR_IF_SYSCALL_FAIL(sock2 = socket(domain, type, protocol));
+ MaybeSave(); // Successful socket creation.
+
+ if (domain == AF_INET) {
+ return CreateUDPBidirectionalBindSocketPair<sockaddr_in>(
+ sock1, sock2, type, dual_stack);
+ }
+ return CreateUDPBidirectionalBindSocketPair<sockaddr_in6>(sock1, sock2,
+ type, dual_stack);
+ };
+}
+
+SocketPairKind Reversed(SocketPairKind const& base) {
+ auto const& creator = base.creator;
+ return SocketPairKind{
+ absl::StrCat("reversed ", base.description),
+ [creator]() -> PosixErrorOr<std::unique_ptr<ReversedSocketPair>> {
+ ASSIGN_OR_RETURN_ERRNO(auto creator_value, creator());
+ return absl::make_unique<ReversedSocketPair>(std::move(creator_value));
+ }};
+}
+
+std::vector<SocketPairKind> IncludeReversals(std::vector<SocketPairKind> vec) {
+ return ApplyVecToVec<SocketPairKind>(std::vector<Middleware>{NoOp, Reversed},
+ vec);
+}
+
+SocketPairKind NoOp(SocketPairKind const& base) { return base; }
+
+void TransferTest(int fd1, int fd2) {
+ char buf1[20];
+ RandomizeBuffer(buf1, sizeof(buf1));
+ ASSERT_THAT(WriteFd(fd1, buf1, sizeof(buf1)),
+ SyscallSucceedsWithValue(sizeof(buf1)));
+
+ char buf2[20];
+ ASSERT_THAT(ReadFd(fd2, buf2, sizeof(buf2)),
+ SyscallSucceedsWithValue(sizeof(buf2)));
+
+ EXPECT_EQ(0, memcmp(buf1, buf2, sizeof(buf1)));
+
+ RandomizeBuffer(buf1, sizeof(buf1));
+ ASSERT_THAT(WriteFd(fd2, buf1, sizeof(buf1)),
+ SyscallSucceedsWithValue(sizeof(buf1)));
+
+ ASSERT_THAT(ReadFd(fd1, buf2, sizeof(buf2)),
+ SyscallSucceedsWithValue(sizeof(buf2)));
+
+ EXPECT_EQ(0, memcmp(buf1, buf2, sizeof(buf1)));
+}
+
+// Initializes the given buffer with random data.
+void RandomizeBuffer(char* ptr, size_t len) {
+ uint32_t seed = time(nullptr);
+ for (size_t i = 0; i < len; ++i) {
+ ptr[i] = static_cast<char>(rand_r(&seed));
+ }
+}
+
+size_t CalculateUnixSockAddrLen(const char* sun_path) {
+ // Abstract addresses always return the full length.
+ if (sun_path[0] == 0) {
+ return sizeof(sockaddr_un);
+ }
+ // Filesystem addresses use the address length plus the 2 byte sun_family and
+ // null terminator.
+ return strlen(sun_path) + 3;
+}
+
+struct sockaddr_storage AddrFDSocketPair::to_storage(const sockaddr_un& addr) {
+ struct sockaddr_storage addr_storage = {};
+ memcpy(&addr_storage, &addr, sizeof(addr));
+ return addr_storage;
+}
+
+struct sockaddr_storage AddrFDSocketPair::to_storage(const sockaddr_in& addr) {
+ struct sockaddr_storage addr_storage = {};
+ memcpy(&addr_storage, &addr, sizeof(addr));
+ return addr_storage;
+}
+
+struct sockaddr_storage AddrFDSocketPair::to_storage(const sockaddr_in6& addr) {
+ struct sockaddr_storage addr_storage = {};
+ memcpy(&addr_storage, &addr, sizeof(addr));
+ return addr_storage;
+}
+
+SocketKind SimpleSocket(int fam, int type, int proto) {
+ return SocketKind{
+ absl::StrCat("Family ", fam, ", type ", type, ", proto ", proto),
+ SyscallSocketCreator(fam, type, proto)};
+}
+
+ssize_t SendLargeSendMsg(const std::unique_ptr<SocketPair>& sockets,
+ size_t size, bool reader) {
+ const int rfd = sockets->second_fd();
+ ScopedThread t([rfd, size, reader] {
+ if (!reader) {
+ return;
+ }
+
+ // Potentially too many syscalls in the loop.
+ const DisableSave ds;
+
+ std::vector<char> buf(size);
+ size_t total = 0;
+
+ while (total < size) {
+ int ret = read(rfd, buf.data(), buf.size());
+ if (ret == -1 && errno == EAGAIN) {
+ continue;
+ }
+ if (ret > 0) {
+ total += ret;
+ }
+
+ // Assert to return on first failure.
+ ASSERT_THAT(ret, SyscallSucceeds());
+ }
+ });
+
+ std::vector<char> buf(size);
+
+ struct iovec iov = {};
+ iov.iov_base = buf.data();
+ iov.iov_len = buf.size();
+
+ struct msghdr msg = {};
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ return RetryEINTR(sendmsg)(sockets->first_fd(), &msg, 0);
+}
+
+PosixErrorOr<int> PortAvailable(int port, AddressFamily family, SocketType type,
+ bool reuse_addr) {
+ if (port < 0) {
+ return PosixError(EINVAL, "Invalid port");
+ }
+
+ // Both Ipv6 and Dualstack are AF_INET6.
+ int sock_fam = (family == AddressFamily::kIpv4 ? AF_INET : AF_INET6);
+ int sock_type = (type == SocketType::kTcp ? SOCK_STREAM : SOCK_DGRAM);
+ ASSIGN_OR_RETURN_ERRNO(auto fd, Socket(sock_fam, sock_type, 0));
+
+ if (reuse_addr) {
+ int one = 1;
+ RETURN_ERROR_IF_SYSCALL_FAIL(
+ setsockopt(fd.get(), SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)));
+ }
+
+ // Try to bind.
+ sockaddr_storage storage = {};
+ int storage_size = 0;
+ if (family == AddressFamily::kIpv4) {
+ sockaddr_in* addr = reinterpret_cast<sockaddr_in*>(&storage);
+ storage_size = sizeof(*addr);
+ addr->sin_family = AF_INET;
+ addr->sin_port = htons(port);
+ addr->sin_addr.s_addr = htonl(INADDR_ANY);
+ } else {
+ sockaddr_in6* addr = reinterpret_cast<sockaddr_in6*>(&storage);
+ storage_size = sizeof(*addr);
+ addr->sin6_family = AF_INET6;
+ addr->sin6_port = htons(port);
+ if (family == AddressFamily::kDualStack) {
+ inet_pton(AF_INET6, "::ffff:0.0.0.0",
+ reinterpret_cast<void*>(&addr->sin6_addr.s6_addr));
+ } else {
+ addr->sin6_addr = in6addr_any;
+ }
+ }
+
+ RETURN_ERROR_IF_SYSCALL_FAIL(
+ bind(fd.get(), reinterpret_cast<sockaddr*>(&storage), storage_size));
+
+ // If the user specified 0 as the port, we will return the port that the
+ // kernel gave us, otherwise we will validate that this socket bound to the
+ // requested port.
+ sockaddr_storage bound_storage = {};
+ socklen_t bound_storage_size = sizeof(bound_storage);
+ RETURN_ERROR_IF_SYSCALL_FAIL(
+ getsockname(fd.get(), reinterpret_cast<sockaddr*>(&bound_storage),
+ &bound_storage_size));
+
+ int available_port = -1;
+ if (bound_storage.ss_family == AF_INET) {
+ sockaddr_in* addr = reinterpret_cast<sockaddr_in*>(&bound_storage);
+ available_port = ntohs(addr->sin_port);
+ } else if (bound_storage.ss_family == AF_INET6) {
+ sockaddr_in6* addr = reinterpret_cast<sockaddr_in6*>(&bound_storage);
+ available_port = ntohs(addr->sin6_port);
+ } else {
+ return PosixError(EPROTOTYPE, "Getsockname returned invalid family");
+ }
+
+ // If we requested a specific port make sure our bound port is that port.
+ if (port != 0 && available_port != port) {
+ return PosixError(EINVAL,
+ absl::StrCat("Bound port ", available_port,
+ " was not equal to requested port ", port));
+ }
+
+ // If we're trying to do a TCP socket, let's also try to listen.
+ if (type == SocketType::kTcp) {
+ RETURN_ERROR_IF_SYSCALL_FAIL(listen(fd.get(), 1));
+ }
+
+ return available_port;
+}
+
+PosixError FreeAvailablePort(int port) {
+ return NoError();
+}
+
+PosixErrorOr<int> SendMsg(int sock, msghdr* msg, char buf[], int buf_size) {
+ struct iovec iov;
+ iov.iov_base = buf;
+ iov.iov_len = buf_size;
+ msg->msg_iov = &iov;
+ msg->msg_iovlen = 1;
+
+ int ret;
+ RETURN_ERROR_IF_SYSCALL_FAIL(ret = RetryEINTR(sendmsg)(sock, msg, 0));
+ return ret;
+}
+
+void RecvNoData(int sock) {
+ char data = 0;
+ struct iovec iov;
+ iov.iov_base = &data;
+ iov.iov_len = 1;
+ struct msghdr msg = {};
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ ASSERT_THAT(RetryEINTR(recvmsg)(sock, &msg, MSG_DONTWAIT),
+ SyscallFailsWithErrno(EAGAIN));
+}
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_test_util.h b/test/syscalls/linux/socket_test_util.h
new file mode 100644
index 000000000..e3e741478
--- /dev/null
+++ b/test/syscalls/linux/socket_test_util.h
@@ -0,0 +1,449 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_SOCKET_TEST_UTIL_H_
+#define GVISOR_TEST_SYSCALLS_SOCKET_TEST_UTIL_H_
+
+#include <errno.h>
+#include <netinet/ip.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <functional>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "absl/strings/str_format.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/posix_error.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Wrapper for socket(2) that returns a FileDescriptor.
+inline PosixErrorOr<FileDescriptor> Socket(int family, int type, int protocol) {
+ int fd = socket(family, type, protocol);
+ MaybeSave();
+ if (fd < 0) {
+ return PosixError(
+ errno, absl::StrFormat("socket(%d, %d, %d)", family, type, protocol));
+ }
+ return FileDescriptor(fd);
+}
+
+// Wrapper for accept(2) that returns a FileDescriptor.
+inline PosixErrorOr<FileDescriptor> Accept(int sockfd, sockaddr* addr,
+ socklen_t* addrlen) {
+ int fd = RetryEINTR(accept)(sockfd, addr, addrlen);
+ MaybeSave();
+ if (fd < 0) {
+ return PosixError(
+ errno, absl::StrFormat("accept(%d, %p, %p)", sockfd, addr, addrlen));
+ }
+ return FileDescriptor(fd);
+}
+
+// Wrapper for accept4(2) that returns a FileDescriptor.
+inline PosixErrorOr<FileDescriptor> Accept4(int sockfd, sockaddr* addr,
+ socklen_t* addrlen, int flags) {
+ int fd = RetryEINTR(accept4)(sockfd, addr, addrlen, flags);
+ MaybeSave();
+ if (fd < 0) {
+ return PosixError(errno, absl::StrFormat("accept4(%d, %p, %p, %#x)", sockfd,
+ addr, addrlen, flags));
+ }
+ return FileDescriptor(fd);
+}
+
+inline ssize_t SendFd(int fd, void* buf, size_t count, int flags) {
+ return internal::ApplyFileIoSyscall(
+ [&](size_t completed) {
+ return sendto(fd, static_cast<char*>(buf) + completed,
+ count - completed, flags, nullptr, 0);
+ },
+ count);
+}
+
+// A Creator<T> is a function that attempts to create and return a new T. (This
+// is copy/pasted from cloud/gvisor/api/sandbox_util.h and is just duplicated
+// here for clarity.)
+template <typename T>
+using Creator = std::function<PosixErrorOr<std::unique_ptr<T>>()>;
+
+// A SocketPair represents a pair of socket file descriptors owned by the
+// SocketPair.
+class SocketPair {
+ public:
+ virtual ~SocketPair() = default;
+
+ virtual int first_fd() const = 0;
+ virtual int second_fd() const = 0;
+ virtual int release_first_fd() = 0;
+ virtual int release_second_fd() = 0;
+ virtual const struct sockaddr* first_addr() const = 0;
+ virtual const struct sockaddr* second_addr() const = 0;
+ virtual size_t first_addr_size() const = 0;
+ virtual size_t second_addr_size() const = 0;
+ virtual size_t first_addr_len() const = 0;
+ virtual size_t second_addr_len() const = 0;
+};
+
+// A FDSocketPair is a SocketPair that consists of only a pair of file
+// descriptors.
+class FDSocketPair : public SocketPair {
+ public:
+ FDSocketPair(int first_fd, int second_fd)
+ : first_(first_fd), second_(second_fd) {}
+
+ int first_fd() const override { return first_.get(); }
+ int second_fd() const override { return second_.get(); }
+ int release_first_fd() override { return first_.release(); }
+ int release_second_fd() override { return second_.release(); }
+ const struct sockaddr* first_addr() const override { return nullptr; }
+ const struct sockaddr* second_addr() const override { return nullptr; }
+ size_t first_addr_size() const override { return 0; }
+ size_t second_addr_size() const override { return 0; }
+ size_t first_addr_len() const override { return 0; }
+ size_t second_addr_len() const override { return 0; }
+
+ private:
+ FileDescriptor first_;
+ FileDescriptor second_;
+};
+
+// CalculateUnixSockAddrLen calculates the length returned by recvfrom(2) and
+// recvmsg(2) for Unix sockets.
+size_t CalculateUnixSockAddrLen(const char* sun_path);
+
+// A AddrFDSocketPair is a SocketPair that consists of a pair of file
+// descriptors in addition to a pair of socket addresses.
+class AddrFDSocketPair : public SocketPair {
+ public:
+ AddrFDSocketPair(int first_fd, int second_fd,
+ const struct sockaddr_un& first_address,
+ const struct sockaddr_un& second_address)
+ : first_(first_fd),
+ second_(second_fd),
+ first_addr_(to_storage(first_address)),
+ second_addr_(to_storage(second_address)),
+ first_len_(CalculateUnixSockAddrLen(first_address.sun_path)),
+ second_len_(CalculateUnixSockAddrLen(second_address.sun_path)),
+ first_size_(sizeof(first_address)),
+ second_size_(sizeof(second_address)) {}
+
+ AddrFDSocketPair(int first_fd, int second_fd,
+ const struct sockaddr_in& first_address,
+ const struct sockaddr_in& second_address)
+ : first_(first_fd),
+ second_(second_fd),
+ first_addr_(to_storage(first_address)),
+ second_addr_(to_storage(second_address)),
+ first_len_(sizeof(first_address)),
+ second_len_(sizeof(second_address)),
+ first_size_(sizeof(first_address)),
+ second_size_(sizeof(second_address)) {}
+
+ AddrFDSocketPair(int first_fd, int second_fd,
+ const struct sockaddr_in6& first_address,
+ const struct sockaddr_in6& second_address)
+ : first_(first_fd),
+ second_(second_fd),
+ first_addr_(to_storage(first_address)),
+ second_addr_(to_storage(second_address)),
+ first_len_(sizeof(first_address)),
+ second_len_(sizeof(second_address)),
+ first_size_(sizeof(first_address)),
+ second_size_(sizeof(second_address)) {}
+
+ int first_fd() const override { return first_.get(); }
+ int second_fd() const override { return second_.get(); }
+ int release_first_fd() override { return first_.release(); }
+ int release_second_fd() override { return second_.release(); }
+ const struct sockaddr* first_addr() const override {
+ return reinterpret_cast<const struct sockaddr*>(&first_addr_);
+ }
+ const struct sockaddr* second_addr() const override {
+ return reinterpret_cast<const struct sockaddr*>(&second_addr_);
+ }
+ size_t first_addr_size() const override { return first_size_; }
+ size_t second_addr_size() const override { return second_size_; }
+ size_t first_addr_len() const override { return first_len_; }
+ size_t second_addr_len() const override { return second_len_; }
+
+ private:
+ // to_storage coverts a sockaddr_* to a sockaddr_storage.
+ static struct sockaddr_storage to_storage(const sockaddr_un& addr);
+ static struct sockaddr_storage to_storage(const sockaddr_in& addr);
+ static struct sockaddr_storage to_storage(const sockaddr_in6& addr);
+
+ FileDescriptor first_;
+ FileDescriptor second_;
+ const struct sockaddr_storage first_addr_;
+ const struct sockaddr_storage second_addr_;
+ const size_t first_len_;
+ const size_t second_len_;
+ const size_t first_size_;
+ const size_t second_size_;
+};
+
+// SyscallSocketPairCreator returns a Creator<SocketPair> that obtains file
+// descriptors by invoking the socketpair() syscall.
+Creator<SocketPair> SyscallSocketPairCreator(int domain, int type,
+ int protocol);
+
+// SyscallSocketCreator returns a Creator<FileDescriptor> that obtains a file
+// descriptor by invoking the socket() syscall.
+Creator<FileDescriptor> SyscallSocketCreator(int domain, int type,
+ int protocol);
+
+// FilesystemBidirectionalBindSocketPairCreator returns a Creator<SocketPair>
+// that obtains file descriptors by invoking the bind() and connect() syscalls
+// on filesystem paths. Only works for DGRAM sockets.
+Creator<SocketPair> FilesystemBidirectionalBindSocketPairCreator(int domain,
+ int type,
+ int protocol);
+
+// AbstractBidirectionalBindSocketPairCreator returns a Creator<SocketPair> that
+// obtains file descriptors by invoking the bind() and connect() syscalls on
+// abstract namespace paths. Only works for DGRAM sockets.
+Creator<SocketPair> AbstractBidirectionalBindSocketPairCreator(int domain,
+ int type,
+ int protocol);
+
+// SocketpairGoferSocketPairCreator returns a Creator<SocketPair> that
+// obtains file descriptors by connect() syscalls on two sockets with socketpair
+// gofer paths.
+Creator<SocketPair> SocketpairGoferSocketPairCreator(int domain, int type,
+ int protocol);
+
+// SocketpairGoferFileSocketPairCreator returns a Creator<SocketPair> that
+// obtains file descriptors by open() syscalls on socketpair gofer paths.
+Creator<SocketPair> SocketpairGoferFileSocketPairCreator(int flags);
+
+// FilesystemAcceptBindSocketPairCreator returns a Creator<SocketPair> that
+// obtains file descriptors by invoking the accept() and bind() syscalls on
+// a filesystem path. Only works for STREAM and SEQPACKET sockets.
+Creator<SocketPair> FilesystemAcceptBindSocketPairCreator(int domain, int type,
+ int protocol);
+
+// AbstractAcceptBindSocketPairCreator returns a Creator<SocketPair> that
+// obtains file descriptors by invoking the accept() and bind() syscalls on a
+// abstract namespace path. Only works for STREAM and SEQPACKET sockets.
+Creator<SocketPair> AbstractAcceptBindSocketPairCreator(int domain, int type,
+ int protocol);
+
+// FilesystemUnboundSocketPairCreator returns a Creator<SocketPair> that obtains
+// file descriptors by invoking the socket() syscall and generates a filesystem
+// path for binding.
+Creator<SocketPair> FilesystemUnboundSocketPairCreator(int domain, int type,
+ int protocol);
+
+// AbstractUnboundSocketPairCreator returns a Creator<SocketPair> that obtains
+// file descriptors by invoking the socket() syscall and generates an abstract
+// path for binding.
+Creator<SocketPair> AbstractUnboundSocketPairCreator(int domain, int type,
+ int protocol);
+
+// TCPAcceptBindSocketPairCreator returns a Creator<SocketPair> that obtains
+// file descriptors by invoking the accept() and bind() syscalls on TCP sockets.
+Creator<SocketPair> TCPAcceptBindSocketPairCreator(int domain, int type,
+ int protocol,
+ bool dual_stack);
+
+// UDPBidirectionalBindSocketPairCreator returns a Creator<SocketPair> that
+// obtains file descriptors by invoking the bind() and connect() syscalls on UDP
+// sockets.
+Creator<SocketPair> UDPBidirectionalBindSocketPairCreator(int domain, int type,
+ int protocol,
+ bool dual_stack);
+
+// A SocketPairKind couples a human-readable description of a socket pair with
+// a function that creates such a socket pair.
+struct SocketPairKind {
+ std::string description;
+ Creator<SocketPair> creator;
+
+ // Create creates a socket pair of this kind.
+ PosixErrorOr<std::unique_ptr<SocketPair>> Create() const { return creator(); }
+};
+
+// A SocketKind couples a human-readable description of a socket with
+// a function that creates such a socket.
+struct SocketKind {
+ std::string description;
+ Creator<FileDescriptor> creator;
+
+ // Create creates a socket pair of this kind.
+ PosixErrorOr<std::unique_ptr<FileDescriptor>> Create() const {
+ return creator();
+ }
+};
+
+// A ReversedSocketPair wraps another SocketPair but flips the first and second
+// file descriptors. ReversedSocketPair is used to test socket pairs that
+// should be symmetric.
+class ReversedSocketPair : public SocketPair {
+ public:
+ explicit ReversedSocketPair(std::unique_ptr<SocketPair> base)
+ : base_(std::move(base)) {}
+
+ int first_fd() const override { return base_->second_fd(); }
+ int second_fd() const override { return base_->first_fd(); }
+ int release_first_fd() override { return base_->release_second_fd(); }
+ int release_second_fd() override { return base_->release_first_fd(); }
+ const struct sockaddr* first_addr() const override {
+ return base_->second_addr();
+ }
+ const struct sockaddr* second_addr() const override {
+ return base_->first_addr();
+ }
+ size_t first_addr_size() const override { return base_->second_addr_size(); }
+ size_t second_addr_size() const override { return base_->first_addr_size(); }
+ size_t first_addr_len() const override { return base_->second_addr_len(); }
+ size_t second_addr_len() const override { return base_->first_addr_len(); }
+
+ private:
+ std::unique_ptr<SocketPair> base_;
+};
+
+// Reversed returns a SocketPairKind that represents SocketPairs created by
+// flipping the file descriptors provided by another SocketPair.
+SocketPairKind Reversed(SocketPairKind const& base);
+
+// IncludeReversals returns a vector<SocketPairKind> that returns all
+// SocketPairKinds in `vec` as well as all SocketPairKinds obtained by flipping
+// the file descriptors provided by the kinds in `vec`.
+std::vector<SocketPairKind> IncludeReversals(std::vector<SocketPairKind> vec);
+
+// A Middleware is a function wraps a SocketPairKind.
+using Middleware = std::function<SocketPairKind(SocketPairKind)>;
+
+// Reversed returns a SocketPairKind that represents SocketPairs created by
+// flipping the file descriptors provided by another SocketPair.
+template <typename T>
+Middleware SetSockOpt(int level, int optname, T* value) {
+ return [=](SocketPairKind const& base) {
+ auto const& creator = base.creator;
+ return SocketPairKind{
+ absl::StrCat("setsockopt(", level, ", ", optname, ", ", *value, ") ",
+ base.description),
+ [creator, level, optname,
+ value]() -> PosixErrorOr<std::unique_ptr<SocketPair>> {
+ ASSIGN_OR_RETURN_ERRNO(auto creator_value, creator());
+ if (creator_value->first_fd() >= 0) {
+ RETURN_ERROR_IF_SYSCALL_FAIL(setsockopt(
+ creator_value->first_fd(), level, optname, value, sizeof(T)));
+ }
+ if (creator_value->second_fd() >= 0) {
+ RETURN_ERROR_IF_SYSCALL_FAIL(setsockopt(
+ creator_value->second_fd(), level, optname, value, sizeof(T)));
+ }
+ return creator_value;
+ }};
+ };
+}
+
+constexpr int kSockOptOn = 1;
+constexpr int kSockOptOff = 0;
+
+// NoOp returns the same SocketPairKind that it is passed.
+SocketPairKind NoOp(SocketPairKind const& base);
+
+// TransferTest tests that data can be send back and fourth between two
+// specified FDs. Note that calls to this function should be wrapped in
+// ASSERT_NO_FATAL_FAILURE().
+void TransferTest(int fd1, int fd2);
+
+// Fills [buf, buf+len) with random bytes.
+void RandomizeBuffer(char* buf, size_t len);
+
+// Base test fixture for tests that operate on pairs of connected sockets.
+class SocketPairTest : public ::testing::TestWithParam<SocketPairKind> {
+ protected:
+ SocketPairTest() {
+ // gUnit uses printf, so so will we.
+ printf("Testing with %s\n", GetParam().description.c_str());
+ }
+
+ PosixErrorOr<std::unique_ptr<SocketPair>> NewSocketPair() const {
+ return GetParam().Create();
+ }
+};
+
+// Base test fixture for tests that operate on simple Sockets.
+class SimpleSocketTest : public ::testing::TestWithParam<SocketKind> {
+ protected:
+ SimpleSocketTest() {
+ // gUnit uses printf, so so will we.
+ printf("Testing with %s\n", GetParam().description.c_str());
+ }
+
+ PosixErrorOr<std::unique_ptr<FileDescriptor>> NewSocket() const {
+ return GetParam().Create();
+ }
+};
+
+SocketKind SimpleSocket(int fam, int type, int proto);
+
+// Send a buffer of size 'size' to sockets->first_fd(), returning the result of
+// sendmsg.
+//
+// If reader, read from second_fd() until size bytes have been read.
+ssize_t SendLargeSendMsg(const std::unique_ptr<SocketPair>& sockets,
+ size_t size, bool reader);
+
+// Initializes the given buffer with random data.
+void RandomizeBuffer(char* ptr, size_t len);
+
+enum class AddressFamily { kIpv4 = 1, kIpv6 = 2, kDualStack = 3 };
+enum class SocketType { kUdp = 1, kTcp = 2 };
+
+// Returns a PosixError or a port that is available. If 0 is specified as the
+// port it will bind port 0 (and allow the kernel to select any free port).
+// Otherwise, it will try to bind the specified port and validate that it can be
+// used for the requested family and socket type. The final option is
+// reuse_addr. This specifies whether SO_REUSEADDR should be applied before a
+// bind(2) attempt. SO_REUSEADDR means that sockets in TIME_WAIT states or other
+// bound UDP sockets would not cause an error on bind(2). This option should be
+// set if subsequent calls to bind on the returned port will also use
+// SO_REUSEADDR.
+//
+// Note: That this test will attempt to bind the ANY address for the respective
+// protocol.
+PosixErrorOr<int> PortAvailable(int port, AddressFamily family, SocketType type,
+ bool reuse_addr);
+
+// FreeAvailablePort is used to return a port that was obtained by using
+// the PortAvailable helper with port 0.
+PosixError FreeAvailablePort(int port);
+
+// SendMsg converts a buffer to an iovec and adds it to msg before sending it.
+PosixErrorOr<int> SendMsg(int sock, msghdr* msg, char buf[], int buf_size);
+
+// RecvNoData checks that no data is receivable on sock.
+void RecvNoData(int sock);
+
+// Base test fixture for tests that apply to all kinds of pairs of connected
+// sockets.
+using AllSocketPairTest = SocketPairTest;
+
+} // namespace testing
+} // namespace gvisor
+
+#endif // GVISOR_TEST_SYSCALLS_SOCKET_TEST_UTIL_H_
diff --git a/test/syscalls/linux/socket_unix.cc b/test/syscalls/linux/socket_unix.cc
new file mode 100644
index 000000000..c60a965ae
--- /dev/null
+++ b/test/syscalls/linux/socket_unix.cc
@@ -0,0 +1,1181 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_unix.h"
+
+#include <net/if.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "absl/strings/string_view.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+// This file is a generic socket test file. It must be built with another file
+// that provides the test types.
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST_P(UnixSocketPairTest, BasicFDPass) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ auto pair =
+ ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+ ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+ sent_data, sizeof(sent_data)));
+
+ char received_data[20];
+ int fd = -1;
+ ASSERT_NO_FATAL_FAILURE(RecvSingleFD(sockets->second_fd(), &fd, received_data,
+ sizeof(received_data)));
+
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+ ASSERT_NO_FATAL_FAILURE(TransferTest(fd, pair->first_fd()));
+}
+
+TEST_P(UnixSocketPairTest, BasicTwoFDPass) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ auto pair1 =
+ ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+ auto pair2 =
+ ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+ int sent_fds[] = {pair1->second_fd(), pair2->second_fd()};
+
+ ASSERT_NO_FATAL_FAILURE(
+ SendFDs(sockets->first_fd(), sent_fds, 2, sent_data, sizeof(sent_data)));
+
+ char received_data[20];
+ int received_fds[] = {-1, -1};
+
+ ASSERT_NO_FATAL_FAILURE(RecvFDs(sockets->second_fd(), received_fds, 2,
+ received_data, sizeof(received_data)));
+
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+ ASSERT_NO_FATAL_FAILURE(TransferTest(received_fds[0], pair1->first_fd()));
+ ASSERT_NO_FATAL_FAILURE(TransferTest(received_fds[1], pair2->first_fd()));
+}
+
+TEST_P(UnixSocketPairTest, BasicThreeFDPass) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ auto pair1 =
+ ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+ auto pair2 =
+ ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+ auto pair3 =
+ ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+ int sent_fds[] = {pair1->second_fd(), pair2->second_fd(), pair3->second_fd()};
+
+ ASSERT_NO_FATAL_FAILURE(
+ SendFDs(sockets->first_fd(), sent_fds, 3, sent_data, sizeof(sent_data)));
+
+ char received_data[20];
+ int received_fds[] = {-1, -1, -1};
+
+ ASSERT_NO_FATAL_FAILURE(RecvFDs(sockets->second_fd(), received_fds, 3,
+ received_data, sizeof(received_data)));
+
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+ ASSERT_NO_FATAL_FAILURE(TransferTest(received_fds[0], pair1->first_fd()));
+ ASSERT_NO_FATAL_FAILURE(TransferTest(received_fds[1], pair2->first_fd()));
+ ASSERT_NO_FATAL_FAILURE(TransferTest(received_fds[2], pair3->first_fd()));
+}
+
+TEST_P(UnixSocketPairTest, BadFDPass) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ int sent_fd = -1;
+
+ struct msghdr msg = {};
+ char control[CMSG_SPACE(sizeof(sent_fd))];
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+
+ struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_len = CMSG_LEN(sizeof(sent_fd));
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ memcpy(CMSG_DATA(cmsg), &sent_fd, sizeof(sent_fd));
+
+ struct iovec iov;
+ iov.iov_base = sent_data;
+ iov.iov_len = sizeof(sent_data);
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ ASSERT_THAT(RetryEINTR(sendmsg)(sockets->first_fd(), &msg, 0),
+ SyscallFailsWithErrno(EBADF));
+}
+
+// BasicFDPassNoSpace starts off by sending a single FD just like BasicFDPass.
+// The difference is that when calling recvmsg, no space for FDs is provided,
+// only space for the cmsg header.
+TEST_P(UnixSocketPairTest, BasicFDPassNoSpace) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ auto pair =
+ ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+ ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+ sent_data, sizeof(sent_data)));
+
+ char received_data[20];
+
+ struct msghdr msg = {};
+ std::vector<char> control(CMSG_SPACE(0));
+ msg.msg_control = &control[0];
+ msg.msg_controllen = control.size();
+
+ struct iovec iov;
+ iov.iov_base = received_data;
+ iov.iov_len = sizeof(received_data);
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0),
+ SyscallSucceedsWithValue(sizeof(received_data)));
+
+ EXPECT_EQ(msg.msg_controllen, 0);
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+}
+
+// BasicFDPassUnalignedRecv starts off by sending a single FD just like
+// BasicFDPass. The difference is that when calling recvmsg, the length of the
+// receive data is only aligned on a 4 byte boundry instead of the normal 8.
+TEST_P(UnixSocketPairTest, BasicFDPassUnalignedRecv) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ auto pair =
+ ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+ ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+ sent_data, sizeof(sent_data)));
+
+ char received_data[20];
+ int fd = -1;
+ ASSERT_NO_FATAL_FAILURE(RecvSingleFDUnaligned(
+ sockets->second_fd(), &fd, received_data, sizeof(received_data)));
+
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+ ASSERT_NO_FATAL_FAILURE(TransferTest(fd, pair->first_fd()));
+}
+
+TEST_P(UnixSocketPairTest, ConcurrentBasicFDPass) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ int sockfd1 = sockets->first_fd();
+ auto recv_func = [sockfd1, sent_data]() {
+ char received_data[20];
+ int fd = -1;
+ RecvSingleFD(sockfd1, &fd, received_data, sizeof(received_data));
+ ASSERT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+ char buf[20];
+ ASSERT_THAT(ReadFd(fd, buf, sizeof(buf)),
+ SyscallSucceedsWithValue(sizeof(buf)));
+ ASSERT_THAT(WriteFd(fd, buf, sizeof(buf)),
+ SyscallSucceedsWithValue(sizeof(buf)));
+ };
+
+ auto pair =
+ ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+ ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->second_fd(), pair->second_fd(),
+ sent_data, sizeof(sent_data)));
+
+ ScopedThread t(recv_func);
+
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+ ASSERT_THAT(WriteFd(pair->first_fd(), sent_data, sizeof(sent_data)),
+ SyscallSucceedsWithValue(sizeof(sent_data)));
+
+ char received_data[20];
+ ASSERT_THAT(ReadFd(pair->first_fd(), received_data, sizeof(received_data)),
+ SyscallSucceedsWithValue(sizeof(received_data)));
+
+ t.Join();
+
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+}
+
+// FDPassNoRecv checks that the control message can be safely ignored by using
+// read(2) instead of recvmsg(2).
+TEST_P(UnixSocketPairTest, FDPassNoRecv) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ auto pair =
+ ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+ ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+ sent_data, sizeof(sent_data)));
+
+ // Read while ignoring the passed FD.
+ char received_data[20];
+ ASSERT_THAT(
+ ReadFd(sockets->second_fd(), received_data, sizeof(received_data)),
+ SyscallSucceedsWithValue(sizeof(received_data)));
+
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+ // Check that the socket still works for reads and writes.
+ ASSERT_NO_FATAL_FAILURE(
+ TransferTest(sockets->first_fd(), sockets->second_fd()));
+}
+
+// FDPassInterspersed1 checks that sent control messages cannot be read before
+// their associated data has been read.
+TEST_P(UnixSocketPairTest, FDPassInterspersed1) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char written_data[20];
+ RandomizeBuffer(written_data, sizeof(written_data));
+
+ ASSERT_THAT(WriteFd(sockets->first_fd(), written_data, sizeof(written_data)),
+ SyscallSucceedsWithValue(sizeof(written_data)));
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ auto pair =
+ ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+ ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+ sent_data, sizeof(sent_data)));
+
+ // Check that we don't get a control message, but do get the data.
+ char received_data[20];
+ RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data));
+ EXPECT_EQ(0, memcmp(written_data, received_data, sizeof(written_data)));
+}
+
+// FDPassInterspersed2 checks that sent control messages cannot be read after
+// their assocated data has been read while ignoring the control message by
+// using read(2) instead of recvmsg(2).
+TEST_P(UnixSocketPairTest, FDPassInterspersed2) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ auto pair =
+ ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+ ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+ sent_data, sizeof(sent_data)));
+
+ char written_data[20];
+ RandomizeBuffer(written_data, sizeof(written_data));
+ ASSERT_THAT(WriteFd(sockets->first_fd(), written_data, sizeof(written_data)),
+ SyscallSucceedsWithValue(sizeof(written_data)));
+
+ char received_data[20];
+ ASSERT_THAT(
+ ReadFd(sockets->second_fd(), received_data, sizeof(received_data)),
+ SyscallSucceedsWithValue(sizeof(received_data)));
+
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+ ASSERT_NO_FATAL_FAILURE(
+ RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data)));
+ EXPECT_EQ(0, memcmp(written_data, received_data, sizeof(written_data)));
+}
+
+TEST_P(UnixSocketPairTest, FDPassNotCoalesced) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data1[20];
+ RandomizeBuffer(sent_data1, sizeof(sent_data1));
+
+ auto pair1 =
+ ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+ ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair1->second_fd(),
+ sent_data1, sizeof(sent_data1)));
+
+ char sent_data2[20];
+ RandomizeBuffer(sent_data2, sizeof(sent_data2));
+
+ auto pair2 =
+ ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+ ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair2->second_fd(),
+ sent_data2, sizeof(sent_data2)));
+
+ char received_data1[sizeof(sent_data1) + sizeof(sent_data2)];
+ int received_fd1 = -1;
+
+ RecvSingleFD(sockets->second_fd(), &received_fd1, received_data1,
+ sizeof(received_data1), sizeof(sent_data1));
+
+ EXPECT_EQ(0, memcmp(sent_data1, received_data1, sizeof(sent_data1)));
+ TransferTest(pair1->first_fd(), pair1->second_fd());
+
+ char received_data2[sizeof(sent_data1) + sizeof(sent_data2)];
+ int received_fd2 = -1;
+
+ RecvSingleFD(sockets->second_fd(), &received_fd2, received_data2,
+ sizeof(received_data2), sizeof(sent_data2));
+
+ EXPECT_EQ(0, memcmp(sent_data2, received_data2, sizeof(sent_data2)));
+ TransferTest(pair2->first_fd(), pair2->second_fd());
+}
+
+TEST_P(UnixSocketPairTest, FDPassPeek) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ auto pair =
+ ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+ ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+ sent_data, sizeof(sent_data)));
+
+ char peek_data[20];
+ int peek_fd = -1;
+ PeekSingleFD(sockets->second_fd(), &peek_fd, peek_data, sizeof(peek_data));
+ EXPECT_EQ(0, memcmp(sent_data, peek_data, sizeof(sent_data)));
+ TransferTest(peek_fd, pair->first_fd());
+ EXPECT_THAT(close(peek_fd), SyscallSucceeds());
+
+ char received_data[20];
+ int received_fd = -1;
+ RecvSingleFD(sockets->second_fd(), &received_fd, received_data,
+ sizeof(received_data));
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+ TransferTest(received_fd, pair->first_fd());
+ EXPECT_THAT(close(received_fd), SyscallSucceeds());
+}
+
+TEST_P(UnixSocketPairTest, BasicCredPass) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ struct ucred sent_creds;
+
+ ASSERT_THAT(sent_creds.pid = getpid(), SyscallSucceeds());
+ ASSERT_THAT(sent_creds.uid = getuid(), SyscallSucceeds());
+ ASSERT_THAT(sent_creds.gid = getgid(), SyscallSucceeds());
+
+ ASSERT_NO_FATAL_FAILURE(
+ SendCreds(sockets->first_fd(), sent_creds, sent_data, sizeof(sent_data)));
+
+ SetSoPassCred(sockets->second_fd());
+
+ char received_data[20];
+ struct ucred received_creds;
+ ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds,
+ received_data, sizeof(received_data)));
+
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+ EXPECT_EQ(sent_creds.pid, received_creds.pid);
+ EXPECT_EQ(sent_creds.uid, received_creds.uid);
+ EXPECT_EQ(sent_creds.gid, received_creds.gid);
+}
+
+TEST_P(UnixSocketPairTest, SendNullCredsBeforeSoPassCredRecvEnd) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ ASSERT_NO_FATAL_FAILURE(
+ SendNullCmsg(sockets->first_fd(), sent_data, sizeof(sent_data)));
+
+ SetSoPassCred(sockets->second_fd());
+
+ char received_data[20];
+ struct ucred received_creds;
+ ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds,
+ received_data, sizeof(received_data)));
+
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+ struct ucred want_creds {
+ 0, 65534, 65534
+ };
+
+ EXPECT_EQ(want_creds.pid, received_creds.pid);
+ EXPECT_EQ(want_creds.uid, received_creds.uid);
+ EXPECT_EQ(want_creds.gid, received_creds.gid);
+}
+
+TEST_P(UnixSocketPairTest, SendNullCredsAfterSoPassCredRecvEnd) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ SetSoPassCred(sockets->second_fd());
+
+ ASSERT_NO_FATAL_FAILURE(
+ SendNullCmsg(sockets->first_fd(), sent_data, sizeof(sent_data)));
+
+ char received_data[20];
+ struct ucred received_creds;
+ ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds,
+ received_data, sizeof(received_data)));
+
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+ struct ucred want_creds;
+ ASSERT_THAT(want_creds.pid = getpid(), SyscallSucceeds());
+ ASSERT_THAT(want_creds.uid = getuid(), SyscallSucceeds());
+ ASSERT_THAT(want_creds.gid = getgid(), SyscallSucceeds());
+
+ EXPECT_EQ(want_creds.pid, received_creds.pid);
+ EXPECT_EQ(want_creds.uid, received_creds.uid);
+ EXPECT_EQ(want_creds.gid, received_creds.gid);
+}
+
+TEST_P(UnixSocketPairTest, SendNullCredsBeforeSoPassCredSendEnd) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ ASSERT_NO_FATAL_FAILURE(
+ SendNullCmsg(sockets->first_fd(), sent_data, sizeof(sent_data)));
+
+ SetSoPassCred(sockets->first_fd());
+
+ char received_data[20];
+ ASSERT_NO_FATAL_FAILURE(
+ RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data)));
+
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+}
+
+TEST_P(UnixSocketPairTest, SendNullCredsAfterSoPassCredSendEnd) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ SetSoPassCred(sockets->first_fd());
+
+ ASSERT_NO_FATAL_FAILURE(
+ SendNullCmsg(sockets->first_fd(), sent_data, sizeof(sent_data)));
+
+ char received_data[20];
+ ASSERT_NO_FATAL_FAILURE(
+ RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data)));
+
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+}
+
+TEST_P(UnixSocketPairTest, SendNullCredsBeforeSoPassCredRecvEndAfterSendEnd) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ SetSoPassCred(sockets->first_fd());
+
+ ASSERT_NO_FATAL_FAILURE(
+ SendNullCmsg(sockets->first_fd(), sent_data, sizeof(sent_data)));
+
+ SetSoPassCred(sockets->second_fd());
+
+ char received_data[20];
+ struct ucred received_creds;
+ ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds,
+ received_data, sizeof(received_data)));
+
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+ struct ucred want_creds;
+ ASSERT_THAT(want_creds.pid = getpid(), SyscallSucceeds());
+ ASSERT_THAT(want_creds.uid = getuid(), SyscallSucceeds());
+ ASSERT_THAT(want_creds.gid = getgid(), SyscallSucceeds());
+
+ EXPECT_EQ(want_creds.pid, received_creds.pid);
+ EXPECT_EQ(want_creds.uid, received_creds.uid);
+ EXPECT_EQ(want_creds.gid, received_creds.gid);
+}
+
+TEST_P(UnixSocketPairTest, WriteBeforeSoPassCredRecvEnd) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data, sizeof(sent_data)),
+ SyscallSucceedsWithValue(sizeof(sent_data)));
+
+ SetSoPassCred(sockets->second_fd());
+
+ char received_data[20];
+
+ struct ucred received_creds;
+ ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds,
+ received_data, sizeof(received_data)));
+
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+ struct ucred want_creds {
+ 0, 65534, 65534
+ };
+
+ EXPECT_EQ(want_creds.pid, received_creds.pid);
+ EXPECT_EQ(want_creds.uid, received_creds.uid);
+ EXPECT_EQ(want_creds.gid, received_creds.gid);
+}
+
+TEST_P(UnixSocketPairTest, WriteAfterSoPassCredRecvEnd) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ SetSoPassCred(sockets->second_fd());
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+ ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data, sizeof(sent_data)),
+ SyscallSucceedsWithValue(sizeof(sent_data)));
+
+ char received_data[20];
+
+ struct ucred received_creds;
+ ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds,
+ received_data, sizeof(received_data)));
+
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+ struct ucred want_creds;
+ ASSERT_THAT(want_creds.pid = getpid(), SyscallSucceeds());
+ ASSERT_THAT(want_creds.uid = getuid(), SyscallSucceeds());
+ ASSERT_THAT(want_creds.gid = getgid(), SyscallSucceeds());
+
+ EXPECT_EQ(want_creds.pid, received_creds.pid);
+ EXPECT_EQ(want_creds.uid, received_creds.uid);
+ EXPECT_EQ(want_creds.gid, received_creds.gid);
+}
+
+TEST_P(UnixSocketPairTest, WriteBeforeSoPassCredSendEnd) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data, sizeof(sent_data)),
+ SyscallSucceedsWithValue(sizeof(sent_data)));
+
+ SetSoPassCred(sockets->first_fd());
+
+ char received_data[20];
+ ASSERT_NO_FATAL_FAILURE(
+ RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data)));
+
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+}
+
+TEST_P(UnixSocketPairTest, WriteAfterSoPassCredSendEnd) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ SetSoPassCred(sockets->first_fd());
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data, sizeof(sent_data)),
+ SyscallSucceedsWithValue(sizeof(sent_data)));
+
+ char received_data[20];
+ ASSERT_NO_FATAL_FAILURE(
+ RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data)));
+
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+}
+
+TEST_P(UnixSocketPairTest, WriteBeforeSoPassCredRecvEndAfterSendEnd) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ SetSoPassCred(sockets->first_fd());
+
+ ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data, sizeof(sent_data)),
+ SyscallSucceedsWithValue(sizeof(sent_data)));
+
+ SetSoPassCred(sockets->second_fd());
+
+ char received_data[20];
+
+ struct ucred received_creds;
+ ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds,
+ received_data, sizeof(received_data)));
+
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+ struct ucred want_creds;
+ ASSERT_THAT(want_creds.pid = getpid(), SyscallSucceeds());
+ ASSERT_THAT(want_creds.uid = getuid(), SyscallSucceeds());
+ ASSERT_THAT(want_creds.gid = getgid(), SyscallSucceeds());
+
+ EXPECT_EQ(want_creds.pid, received_creds.pid);
+ EXPECT_EQ(want_creds.uid, received_creds.uid);
+ EXPECT_EQ(want_creds.gid, received_creds.gid);
+}
+
+TEST_P(UnixSocketPairTest, SoPassCred) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ int opt;
+ socklen_t optLen = sizeof(opt);
+ EXPECT_THAT(
+ getsockopt(sockets->first_fd(), SOL_SOCKET, SO_PASSCRED, &opt, &optLen),
+ SyscallSucceeds());
+ EXPECT_FALSE(opt);
+
+ optLen = sizeof(opt);
+ EXPECT_THAT(
+ getsockopt(sockets->second_fd(), SOL_SOCKET, SO_PASSCRED, &opt, &optLen),
+ SyscallSucceeds());
+ EXPECT_FALSE(opt);
+
+ SetSoPassCred(sockets->first_fd());
+
+ optLen = sizeof(opt);
+ EXPECT_THAT(
+ getsockopt(sockets->first_fd(), SOL_SOCKET, SO_PASSCRED, &opt, &optLen),
+ SyscallSucceeds());
+ EXPECT_TRUE(opt);
+
+ optLen = sizeof(opt);
+ EXPECT_THAT(
+ getsockopt(sockets->second_fd(), SOL_SOCKET, SO_PASSCRED, &opt, &optLen),
+ SyscallSucceeds());
+ EXPECT_FALSE(opt);
+
+ int zero = 0;
+ EXPECT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_PASSCRED, &zero,
+ sizeof(zero)),
+ SyscallSucceeds());
+
+ optLen = sizeof(opt);
+ EXPECT_THAT(
+ getsockopt(sockets->first_fd(), SOL_SOCKET, SO_PASSCRED, &opt, &optLen),
+ SyscallSucceeds());
+ EXPECT_FALSE(opt);
+
+ optLen = sizeof(opt);
+ EXPECT_THAT(
+ getsockopt(sockets->second_fd(), SOL_SOCKET, SO_PASSCRED, &opt, &optLen),
+ SyscallSucceeds());
+ EXPECT_FALSE(opt);
+}
+
+TEST_P(UnixSocketPairTest, NoDataCredPass) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ struct msghdr msg = {};
+
+ struct iovec iov;
+ iov.iov_base = sent_data;
+ iov.iov_len = sizeof(sent_data);
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ char control[CMSG_SPACE(0)];
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+
+ struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_CREDENTIALS;
+ cmsg->cmsg_len = CMSG_LEN(0);
+
+ ASSERT_THAT(RetryEINTR(sendmsg)(sockets->first_fd(), &msg, 0),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST_P(UnixSocketPairTest, NoPassCred) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ struct ucred sent_creds;
+
+ ASSERT_THAT(sent_creds.pid = getpid(), SyscallSucceeds());
+ ASSERT_THAT(sent_creds.uid = getuid(), SyscallSucceeds());
+ ASSERT_THAT(sent_creds.gid = getgid(), SyscallSucceeds());
+
+ ASSERT_NO_FATAL_FAILURE(
+ SendCreds(sockets->first_fd(), sent_creds, sent_data, sizeof(sent_data)));
+
+ char received_data[20];
+
+ ASSERT_NO_FATAL_FAILURE(
+ RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data)));
+
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+}
+
+TEST_P(UnixSocketPairTest, CredAndFDPass) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ struct ucred sent_creds;
+
+ ASSERT_THAT(sent_creds.pid = getpid(), SyscallSucceeds());
+ ASSERT_THAT(sent_creds.uid = getuid(), SyscallSucceeds());
+ ASSERT_THAT(sent_creds.gid = getgid(), SyscallSucceeds());
+
+ auto pair =
+ ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+ ASSERT_NO_FATAL_FAILURE(SendCredsAndFD(sockets->first_fd(), sent_creds,
+ pair->second_fd(), sent_data,
+ sizeof(sent_data)));
+
+ SetSoPassCred(sockets->second_fd());
+
+ char received_data[20];
+ struct ucred received_creds;
+ int fd = -1;
+ ASSERT_NO_FATAL_FAILURE(RecvCredsAndFD(sockets->second_fd(), &received_creds,
+ &fd, received_data,
+ sizeof(received_data)));
+
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+ EXPECT_EQ(sent_creds.pid, received_creds.pid);
+ EXPECT_EQ(sent_creds.uid, received_creds.uid);
+ EXPECT_EQ(sent_creds.gid, received_creds.gid);
+
+ ASSERT_NO_FATAL_FAILURE(TransferTest(fd, pair->first_fd()));
+}
+
+TEST_P(UnixSocketPairTest, FDPassBeforeSoPassCred) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ auto pair =
+ ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+ ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+ sent_data, sizeof(sent_data)));
+
+ SetSoPassCred(sockets->second_fd());
+
+ char received_data[20];
+ struct ucred received_creds;
+ int fd = -1;
+ ASSERT_NO_FATAL_FAILURE(RecvCredsAndFD(sockets->second_fd(), &received_creds,
+ &fd, received_data,
+ sizeof(received_data)));
+
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+ struct ucred want_creds {
+ 0, 65534, 65534
+ };
+
+ EXPECT_EQ(want_creds.pid, received_creds.pid);
+ EXPECT_EQ(want_creds.uid, received_creds.uid);
+ EXPECT_EQ(want_creds.gid, received_creds.gid);
+
+ ASSERT_NO_FATAL_FAILURE(TransferTest(fd, pair->first_fd()));
+}
+
+TEST_P(UnixSocketPairTest, FDPassAfterSoPassCred) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ auto pair =
+ ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+ SetSoPassCred(sockets->second_fd());
+
+ ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+ sent_data, sizeof(sent_data)));
+
+ char received_data[20];
+ struct ucred received_creds;
+ int fd = -1;
+ ASSERT_NO_FATAL_FAILURE(RecvCredsAndFD(sockets->second_fd(), &received_creds,
+ &fd, received_data,
+ sizeof(received_data)));
+
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+ struct ucred want_creds;
+ ASSERT_THAT(want_creds.pid = getpid(), SyscallSucceeds());
+ ASSERT_THAT(want_creds.uid = getuid(), SyscallSucceeds());
+ ASSERT_THAT(want_creds.gid = getgid(), SyscallSucceeds());
+
+ EXPECT_EQ(want_creds.pid, received_creds.pid);
+ EXPECT_EQ(want_creds.uid, received_creds.uid);
+ EXPECT_EQ(want_creds.gid, received_creds.gid);
+
+ ASSERT_NO_FATAL_FAILURE(TransferTest(fd, pair->first_fd()));
+}
+
+TEST_P(UnixSocketPairTest, CloexecDroppedWhenFDPassed) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ auto pair = ASSERT_NO_ERRNO_AND_VALUE(
+ UnixDomainSocketPair(SOCK_SEQPACKET | SOCK_CLOEXEC).Create());
+
+ ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+ sent_data, sizeof(sent_data)));
+
+ char received_data[20];
+ int fd = -1;
+ ASSERT_NO_FATAL_FAILURE(RecvSingleFD(sockets->second_fd(), &fd, received_data,
+ sizeof(received_data)));
+
+ EXPECT_THAT(fcntl(fd, F_GETFD), SyscallSucceedsWithValue(0));
+}
+
+TEST_P(UnixSocketPairTest, CloexecRecvFDPass) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ auto pair =
+ ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+ ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+ sent_data, sizeof(sent_data)));
+
+ struct msghdr msg = {};
+ char control[CMSG_SPACE(sizeof(int))];
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+
+ struct iovec iov;
+ char received_data[20];
+ iov.iov_base = received_data;
+ iov.iov_len = sizeof(received_data);
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, MSG_CMSG_CLOEXEC),
+ SyscallSucceedsWithValue(sizeof(received_data)));
+ struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+ ASSERT_NE(cmsg, nullptr);
+ ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(int)));
+ ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET);
+ ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS);
+
+ int fd = -1;
+ memcpy(&fd, CMSG_DATA(cmsg), sizeof(int));
+
+ EXPECT_THAT(fcntl(fd, F_GETFD), SyscallSucceedsWithValue(FD_CLOEXEC));
+}
+
+TEST_P(UnixSocketPairTest, FDPassAfterSoPassCredWithoutCredSpace) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ auto pair =
+ ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+ SetSoPassCred(sockets->second_fd());
+
+ ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+ sent_data, sizeof(sent_data)));
+
+ struct msghdr msg = {};
+ char control[CMSG_LEN(0)];
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+
+ char received_data[20];
+ struct iovec iov;
+ iov.iov_base = received_data;
+ iov.iov_len = sizeof(received_data);
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0),
+ SyscallSucceedsWithValue(sizeof(received_data)));
+
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+ EXPECT_EQ(msg.msg_controllen, sizeof(control));
+
+ struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+ ASSERT_NE(cmsg, nullptr);
+ EXPECT_EQ(cmsg->cmsg_len, sizeof(control));
+ EXPECT_EQ(cmsg->cmsg_level, SOL_SOCKET);
+ EXPECT_EQ(cmsg->cmsg_type, SCM_CREDENTIALS);
+}
+
+// This test will validate that MSG_CTRUNC as an input flag to recvmsg will
+// not appear as an output flag on the control message when truncation doesn't
+// happen.
+TEST_P(UnixSocketPairTest, MsgCtruncInputIsNoop) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ auto pair =
+ ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+ ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+ sent_data, sizeof(sent_data)));
+
+ struct msghdr msg = {};
+ char control[CMSG_SPACE(sizeof(int)) /* we're passing a single fd */];
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+
+ struct iovec iov;
+ char received_data[20];
+ iov.iov_base = received_data;
+ iov.iov_len = sizeof(received_data);
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, MSG_CTRUNC),
+ SyscallSucceedsWithValue(sizeof(received_data)));
+ struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+ ASSERT_NE(cmsg, nullptr);
+ ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(int)));
+ ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET);
+ ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS);
+
+ // Now we should verify that MSG_CTRUNC wasn't set as an output flag.
+ EXPECT_EQ(msg.msg_flags & MSG_CTRUNC, 0);
+}
+
+TEST_P(UnixSocketPairTest, FDPassAfterSoPassCredWithoutCredHeaderSpace) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ auto pair =
+ ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+ SetSoPassCred(sockets->second_fd());
+
+ ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+ sent_data, sizeof(sent_data)));
+
+ struct msghdr msg = {};
+ char control[CMSG_LEN(0) / 2];
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+
+ char received_data[20];
+ struct iovec iov;
+ iov.iov_base = received_data;
+ iov.iov_len = sizeof(received_data);
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, 0),
+ SyscallSucceedsWithValue(sizeof(received_data)));
+
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+ EXPECT_EQ(msg.msg_controllen, 0);
+}
+
+TEST_P(UnixSocketPairTest, InvalidGetSockOpt) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ int opt;
+ socklen_t optlen = sizeof(opt);
+ EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, -1, &opt, &optlen),
+ SyscallFailsWithErrno(ENOPROTOOPT));
+}
+
+TEST_P(UnixSocketPairTest, BindToBadName) {
+ auto pair =
+ ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+ constexpr char kBadName[] = "/some/path/that/does/not/exist";
+ sockaddr_un sockaddr;
+ sockaddr.sun_family = AF_LOCAL;
+ memcpy(sockaddr.sun_path, kBadName, sizeof(kBadName));
+
+ EXPECT_THAT(
+ bind(pair->first_fd(), reinterpret_cast<struct sockaddr*>(&sockaddr),
+ sizeof(sockaddr)),
+ SyscallFailsWithErrno(ENOENT));
+}
+
+TEST_P(UnixSocketPairTest, RecvmmsgTimeoutAfterRecv) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ char sent_data[10];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ char received_data[sizeof(sent_data) * 2];
+ std::vector<struct mmsghdr> msgs(2);
+ std::vector<struct iovec> iovs(msgs.size());
+ const int chunk_size = sizeof(received_data) / msgs.size();
+ for (size_t i = 0; i < msgs.size(); i++) {
+ iovs[i].iov_len = chunk_size;
+ iovs[i].iov_base = &received_data[i * chunk_size];
+ msgs[i].msg_hdr.msg_iov = &iovs[i];
+ msgs[i].msg_hdr.msg_iovlen = 1;
+ }
+
+ ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data, sizeof(sent_data)),
+ SyscallSucceedsWithValue(sizeof(sent_data)));
+
+ struct timespec timeout = {0, 1};
+ ASSERT_THAT(RetryEINTR(recvmmsg)(sockets->second_fd(), &msgs[0], msgs.size(),
+ 0, &timeout),
+ SyscallSucceedsWithValue(1));
+
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+ EXPECT_EQ(chunk_size, msgs[0].msg_len);
+}
+
+TEST_P(UnixSocketPairTest, TIOCINQSucceeds) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ int size = -1;
+ EXPECT_THAT(ioctl(sockets->first_fd(), TIOCINQ, &size), SyscallSucceeds());
+ EXPECT_EQ(size, 0);
+
+ const char some_data[] = "dangerzone";
+ ASSERT_THAT(
+ RetryEINTR(send)(sockets->second_fd(), &some_data, sizeof(some_data), 0),
+ SyscallSucceeds());
+ EXPECT_THAT(ioctl(sockets->first_fd(), TIOCINQ, &size), SyscallSucceeds());
+ EXPECT_EQ(size, sizeof(some_data));
+
+ // Linux only reports the first message's size, which is wrong. We test for
+ // the behavior described in the man page.
+ SKIP_IF(!IsRunningOnGvisor());
+
+ ASSERT_THAT(
+ RetryEINTR(send)(sockets->second_fd(), &some_data, sizeof(some_data), 0),
+ SyscallSucceeds());
+ EXPECT_THAT(ioctl(sockets->first_fd(), TIOCINQ, &size), SyscallSucceeds());
+ EXPECT_EQ(size, sizeof(some_data) * 2);
+}
+
+TEST_P(UnixSocketPairTest, TIOCOUTQSucceeds) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ int size = -1;
+ EXPECT_THAT(ioctl(sockets->second_fd(), TIOCOUTQ, &size), SyscallSucceeds());
+ EXPECT_EQ(size, 0);
+
+ // Linux reports bogus numbers which are related to its internal allocations.
+ // We test for the behavior described in the man page.
+ SKIP_IF(!IsRunningOnGvisor());
+
+ const char some_data[] = "dangerzone";
+ ASSERT_THAT(
+ RetryEINTR(send)(sockets->second_fd(), &some_data, sizeof(some_data), 0),
+ SyscallSucceeds());
+ EXPECT_THAT(ioctl(sockets->second_fd(), TIOCOUTQ, &size), SyscallSucceeds());
+ EXPECT_EQ(size, sizeof(some_data));
+
+ ASSERT_THAT(
+ RetryEINTR(send)(sockets->second_fd(), &some_data, sizeof(some_data), 0),
+ SyscallSucceeds());
+ EXPECT_THAT(ioctl(sockets->second_fd(), TIOCOUTQ, &size), SyscallSucceeds());
+ EXPECT_EQ(size, sizeof(some_data) * 2);
+}
+
+TEST_P(UnixSocketPairTest, NetdeviceIoctlsSucceed) {
+ FileDescriptor sock =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_UNIX, SOCK_DGRAM, 0));
+
+ // Prepare the request.
+ struct ifreq ifr;
+ snprintf(ifr.ifr_name, IFNAMSIZ, "lo");
+
+ // Check that the ioctl either succeeds or fails with ENODEV.
+ int err = ioctl(sock.get(), SIOCGIFINDEX, &ifr);
+ if (err < 0) {
+ ASSERT_EQ(errno, ENODEV);
+ }
+}
+
+TEST_P(UnixSocketPairTest, SocketShutdown) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ char buf[20];
+ const std::string data = "abc";
+ ASSERT_THAT(WriteFd(sockets->first_fd(), data.c_str(), 3),
+ SyscallSucceedsWithValue(3));
+ ASSERT_THAT(shutdown(sockets->first_fd(), SHUT_RDWR), SyscallSucceeds());
+ ASSERT_THAT(shutdown(sockets->second_fd(), SHUT_RDWR), SyscallSucceeds());
+
+ // Shutting down a socket does not clear the buffer.
+ ASSERT_THAT(ReadFd(sockets->second_fd(), buf, 3),
+ SyscallSucceedsWithValue(3));
+ EXPECT_EQ(data, absl::string_view(buf, 3));
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix.h b/test/syscalls/linux/socket_unix.h
new file mode 100644
index 000000000..d2a16afb2
--- /dev/null
+++ b/test/syscalls/linux/socket_unix.h
@@ -0,0 +1,29 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_H_
+
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Test fixture for tests that apply to pairs of connected unix sockets.
+using UnixSocketPairTest = SocketPairTest;
+
+} // namespace testing
+} // namespace gvisor
+
+#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_H_
diff --git a/test/syscalls/linux/socket_unix_abstract.cc b/test/syscalls/linux/socket_unix_abstract.cc
new file mode 100644
index 000000000..0878f63ff
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_abstract.cc
@@ -0,0 +1,38 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/socket_unix.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+ return VecCat<SocketPairKind>(ApplyVec<SocketPairKind>(
+ AbstractBoundUnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET},
+ List<int>{0, SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC})));
+}
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, UnixSocketPairTest,
+ ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_abstract_nonblock.cc b/test/syscalls/linux/socket_unix_abstract_nonblock.cc
new file mode 100644
index 000000000..93fb33832
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_abstract_nonblock.cc
@@ -0,0 +1,38 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "test/syscalls/linux/socket_non_blocking.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+ return VecCat<SocketPairKind>(ApplyVec<SocketPairKind>(
+ AbstractBoundUnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET},
+ List<int>{SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC})));
+}
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, NonBlockingSocketPairTest,
+ ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_dgram.cc b/test/syscalls/linux/socket_unix_dgram.cc
new file mode 100644
index 000000000..c17d3990f
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_dgram.cc
@@ -0,0 +1,45 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_unix_dgram.h"
+
+#include <stdio.h>
+#include <sys/un.h>
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST_P(DgramUnixSocketPairTest, WriteOneSideClosed) {
+ // FIXME: gVisor datagram sockets return EPIPE instead of
+ // ECONNREFUSED.
+ SKIP_IF(IsRunningOnGvisor());
+
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds());
+ constexpr char kStr[] = "abc";
+ ASSERT_THAT(write(sockets->second_fd(), kStr, 3),
+ SyscallFailsWithErrno(ECONNREFUSED));
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_dgram.h b/test/syscalls/linux/socket_unix_dgram.h
new file mode 100644
index 000000000..722a3d8e6
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_dgram.h
@@ -0,0 +1,29 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_DGRAM_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_DGRAM_H_
+
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Test fixture for tests that apply to pairs of connected dgram unix sockets.
+using DgramUnixSocketPairTest = SocketPairTest;
+
+} // namespace testing
+} // namespace gvisor
+
+#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_DGRAM_H_
diff --git a/test/syscalls/linux/socket_unix_dgram_local.cc b/test/syscalls/linux/socket_unix_dgram_local.cc
new file mode 100644
index 000000000..b2fa72b5e
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_dgram_local.cc
@@ -0,0 +1,59 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "test/syscalls/linux/socket_non_stream.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/socket_unix_dgram.h"
+#include "test/syscalls/linux/socket_unix_non_stream.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+ return VecCat<SocketPairKind>(VecCat<SocketPairKind>(
+ ApplyVec<SocketPairKind>(
+ UnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_DGRAM},
+ List<int>{0, SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC})),
+ ApplyVec<SocketPairKind>(
+ FilesystemBoundUnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_DGRAM},
+ List<int>{0, SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC})),
+ ApplyVec<SocketPairKind>(
+ AbstractBoundUnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_DGRAM},
+ List<int>{0, SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC}))));
+}
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, DgramUnixSocketPairTest,
+ ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, UnixNonStreamSocketPairTest,
+ ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, NonStreamSocketPairTest,
+ ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_dgram_non_blocking.cc b/test/syscalls/linux/socket_unix_dgram_non_blocking.cc
new file mode 100644
index 000000000..9152c229c
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_dgram_non_blocking.cc
@@ -0,0 +1,68 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+#include <sys/un.h>
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Test fixture for tests that apply to pairs of connected non-blocking dgram
+// unix sockets.
+using NonBlockingDgramUnixSocketPairTest = SocketPairTest;
+
+TEST_P(NonBlockingDgramUnixSocketPairTest, ReadOneSideClosed) {
+ if (IsRunningOnGvisor()) {
+ // FIXME: gVisor datagram sockets return 0 instead of
+ // EAGAIN.
+ return;
+ }
+
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds());
+ char data[10] = {};
+ ASSERT_THAT(read(sockets->second_fd(), data, sizeof(data)),
+ SyscallFailsWithErrno(EAGAIN));
+}
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, NonBlockingDgramUnixSocketPairTest,
+ ::testing::ValuesIn(IncludeReversals(VecCat<SocketPairKind>(
+ ApplyVec<SocketPairKind>(
+ UnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_DGRAM},
+ List<int>{SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC})),
+ ApplyVec<SocketPairKind>(
+ FilesystemBoundUnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_DGRAM},
+ List<int>{SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC})),
+ ApplyVec<SocketPairKind>(
+ AbstractBoundUnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_DGRAM},
+ List<int>{SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC}))))));
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_domain.cc b/test/syscalls/linux/socket_unix_domain.cc
new file mode 100644
index 000000000..f8f0d01eb
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_domain.cc
@@ -0,0 +1,38 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "test/syscalls/linux/socket_generic.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+ return VecCat<SocketPairKind>(ApplyVec<SocketPairKind>(
+ UnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET},
+ List<int>{0, SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC})));
+}
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, AllSocketPairTest,
+ ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_filesystem.cc b/test/syscalls/linux/socket_unix_filesystem.cc
new file mode 100644
index 000000000..be873edcb
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_filesystem.cc
@@ -0,0 +1,38 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/socket_unix.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+ return VecCat<SocketPairKind>(ApplyVec<SocketPairKind>(
+ FilesystemBoundUnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET},
+ List<int>{0, SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC})));
+}
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, UnixSocketPairTest,
+ ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_filesystem_nonblock.cc b/test/syscalls/linux/socket_unix_filesystem_nonblock.cc
new file mode 100644
index 000000000..63e85ac11
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_filesystem_nonblock.cc
@@ -0,0 +1,38 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "test/syscalls/linux/socket_non_blocking.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+ return VecCat<SocketPairKind>(ApplyVec<SocketPairKind>(
+ FilesystemBoundUnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET},
+ List<int>{SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC})));
+}
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, NonBlockingSocketPairTest,
+ ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_non_stream.cc b/test/syscalls/linux/socket_unix_non_stream.cc
new file mode 100644
index 000000000..620397746
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_non_stream.cc
@@ -0,0 +1,229 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_unix_non_stream.h"
+
+#include <stdio.h>
+#include <sys/mman.h>
+#include <sys/un.h>
+
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/memory_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+TEST_P(UnixNonStreamSocketPairTest, RecvMsgTooLarge) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ int rcvbuf;
+ socklen_t length = sizeof(rcvbuf);
+ ASSERT_THAT(
+ getsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVBUF, &rcvbuf, &length),
+ SyscallSucceeds());
+
+ // Make the call larger than the receive buffer.
+ const int recv_size = 3 * rcvbuf;
+
+ // Write a message that does fit in the receive buffer.
+ const int write_size = rcvbuf - kPageSize;
+
+ std::vector<char> write_buf(write_size, 'a');
+ const int ret = RetryEINTR(write)(sockets->second_fd(), write_buf.data(),
+ write_buf.size());
+ if (ret < 0 && errno == ENOBUFS) {
+ // NOTE: Linux may stall the write for a long time and
+ // ultimately return ENOBUFS. Allow this error, since a retry will likely
+ // result in the same error.
+ return;
+ }
+ ASSERT_THAT(ret, SyscallSucceeds());
+
+ std::vector<char> recv_buf(recv_size);
+
+ ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(sockets->first_fd(), recv_buf.data(),
+ recv_buf.size(), write_size));
+
+ recv_buf.resize(write_size);
+ EXPECT_EQ(recv_buf, write_buf);
+}
+
+// Create a region of anonymous memory of size 'size', which is fragmented in
+// FileMem.
+//
+// ptr contains the start address of the region. The returned vector contains
+// all of the mappings to be unmapped when done.
+PosixErrorOr<std::vector<Mapping>> CreateFragmentedRegion(const int size,
+ void** ptr) {
+ Mapping region;
+ ASSIGN_OR_RETURN_ERRNO(region, Mmap(nullptr, size, PROT_NONE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0));
+
+ *ptr = region.ptr();
+
+ // Don't save hundreds of times for all of these mmaps.
+ DisableSave ds;
+
+ std::vector<Mapping> pages;
+
+ // Map and commit a single page at a time, mapping and committing an unrelated
+ // page between each call to force FileMem fragmentation.
+ for (uintptr_t addr = region.addr(); addr < region.endaddr();
+ addr += kPageSize) {
+ Mapping page;
+ ASSIGN_OR_RETURN_ERRNO(
+ page,
+ Mmap(reinterpret_cast<void*>(addr), kPageSize, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0));
+ *reinterpret_cast<volatile char*>(page.ptr()) = 42;
+
+ pages.emplace_back(std::move(page));
+
+ // Unrelated page elsewhere.
+ ASSIGN_OR_RETURN_ERRNO(page,
+ Mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0));
+ *reinterpret_cast<volatile char*>(page.ptr()) = 42;
+
+ pages.emplace_back(std::move(page));
+ }
+
+ // The mappings above have taken ownership of the region.
+ region.release();
+
+ return pages;
+}
+
+// A contiguous iov that is heavily fragmented in FileMem can still be sent
+// successfully.
+TEST_P(UnixNonStreamSocketPairTest, FragmentedSendMsg) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ const int buffer_size = UIO_MAXIOV * kPageSize;
+ // Extra page for message header overhead.
+ const int sndbuf = buffer_size + kPageSize;
+ // N.B. setsockopt(SO_SNDBUF) doubles the passed value.
+ const int set_sndbuf = sndbuf / 2;
+
+ EXPECT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF,
+ &set_sndbuf, sizeof(set_sndbuf)),
+ SyscallSucceeds());
+
+ int actual_sndbuf = 0;
+ socklen_t length = sizeof(actual_sndbuf);
+ ASSERT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF,
+ &actual_sndbuf, &length),
+ SyscallSucceeds());
+
+ if (actual_sndbuf != sndbuf) {
+ // Unable to get the sndbuf we want.
+ //
+ // N.B. At minimum, the socketpair gofer should provide a socket that is
+ // already the correct size.
+ //
+ // TODO: When internal UDS support SO_SNDBUF, we can assert that
+ // we always get the right SO_SNDBUF on gVisor.
+ LOG(INFO) << "SO_SNDBUF = " << actual_sndbuf << ", want " << sndbuf
+ << ". Skipping test";
+ return;
+ }
+
+ // Create a contiguous region of memory of 2*UIO_MAXIOV*PAGE_SIZE. We'll call
+ // sendmsg with a single iov, but the goal is to get the sentry to split this
+ // into > UIO_MAXIOV iovs when calling the kernel.
+ void* ptr;
+ std::vector<Mapping> pages =
+ ASSERT_NO_ERRNO_AND_VALUE(CreateFragmentedRegion(buffer_size, &ptr));
+
+ struct iovec iov = {};
+ iov.iov_base = ptr;
+ iov.iov_len = buffer_size;
+
+ struct msghdr msg = {};
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ // NOTE: Linux has poor behavior in the presence of
+ // physical memory fragmentation. As a result, this may stall for a long time
+ // and ultimately return ENOBUFS. Allow this error, since it means that we
+ // made it to the host kernel and started the sendmsg.
+ EXPECT_THAT(RetryEINTR(sendmsg)(sockets->first_fd(), &msg, 0),
+ AnyOf(SyscallSucceedsWithValue(buffer_size),
+ SyscallFailsWithErrno(ENOBUFS)));
+}
+
+// A contiguous iov that is heavily fragmented in FileMem can still be received
+// into successfully.
+TEST_P(UnixNonStreamSocketPairTest, FragmentedRecvMsg) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ const int buffer_size = UIO_MAXIOV * kPageSize;
+ // Extra page for message header overhead.
+ const int sndbuf = buffer_size + kPageSize;
+ // N.B. setsockopt(SO_SNDBUF) doubles the passed value.
+ const int set_sndbuf = sndbuf / 2;
+
+ EXPECT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF,
+ &set_sndbuf, sizeof(set_sndbuf)),
+ SyscallSucceeds());
+
+ int actual_sndbuf = 0;
+ socklen_t length = sizeof(actual_sndbuf);
+ ASSERT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF,
+ &actual_sndbuf, &length),
+ SyscallSucceeds());
+
+ if (actual_sndbuf != sndbuf) {
+ // Unable to get the sndbuf we want.
+ //
+ // N.B. At minimum, the socketpair gofer should provide a socket that is
+ // already the correct size.
+ //
+ // TODO: When internal UDS support SO_SNDBUF, we can assert that
+ // we always get the right SO_SNDBUF on gVisor.
+ LOG(INFO) << "SO_SNDBUF = " << actual_sndbuf << ", want " << sndbuf
+ << ". Skipping test";
+ return;
+ }
+
+ std::vector<char> write_buf(buffer_size, 'a');
+ const int ret = RetryEINTR(write)(sockets->first_fd(), write_buf.data(),
+ write_buf.size());
+ if (ret < 0 && errno == ENOBUFS) {
+ // NOTE: Linux may stall the write for a long time and
+ // ultimately return ENOBUFS. Allow this error, since a retry will likely
+ // result in the same error.
+ return;
+ }
+ ASSERT_THAT(ret, SyscallSucceeds());
+
+ // Create a contiguous region of memory of 2*UIO_MAXIOV*PAGE_SIZE. We'll call
+ // sendmsg with a single iov, but the goal is to get the sentry to split this
+ // into > UIO_MAXIOV iovs when calling the kernel.
+ void* ptr;
+ std::vector<Mapping> pages =
+ ASSERT_NO_ERRNO_AND_VALUE(CreateFragmentedRegion(buffer_size, &ptr));
+
+ ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(
+ sockets->second_fd(), reinterpret_cast<char*>(ptr), buffer_size));
+
+ EXPECT_EQ(0, memcmp(write_buf.data(), ptr, buffer_size));
+}
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_non_stream.h b/test/syscalls/linux/socket_unix_non_stream.h
new file mode 100644
index 000000000..e4214d949
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_non_stream.h
@@ -0,0 +1,30 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_NON_STREAM_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_NON_STREAM_H_
+
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Test fixture for tests that apply to pairs of connected non-stream
+// unix-domain sockets.
+using UnixNonStreamSocketPairTest = SocketPairTest;
+
+} // namespace testing
+} // namespace gvisor
+
+#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_NON_STREAM_H_
diff --git a/test/syscalls/linux/socket_unix_non_stream_blocking_local.cc b/test/syscalls/linux/socket_unix_non_stream_blocking_local.cc
new file mode 100644
index 000000000..c5d525dde
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_non_stream_blocking_local.cc
@@ -0,0 +1,47 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_non_stream_blocking.h"
+
+#include <vector>
+
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+ return VecCat<SocketPairKind>(
+ ApplyVec<SocketPairKind>(
+ UnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_DGRAM, SOCK_SEQPACKET},
+ List<int>{0, SOCK_CLOEXEC})),
+ ApplyVec<SocketPairKind>(
+ FilesystemBoundUnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_DGRAM, SOCK_SEQPACKET},
+ List<int>{0, SOCK_CLOEXEC})),
+ ApplyVec<SocketPairKind>(
+ AbstractBoundUnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_DGRAM, SOCK_SEQPACKET},
+ List<int>{0, SOCK_CLOEXEC})));
+}
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, BlockingNonStreamSocketPairTest,
+ ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_pair.cc b/test/syscalls/linux/socket_unix_pair.cc
new file mode 100644
index 000000000..85dd3711b
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_pair.cc
@@ -0,0 +1,38 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/socket_unix.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+ return VecCat<SocketPairKind>(ApplyVec<SocketPairKind>(
+ UnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET},
+ List<int>{0, SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC})));
+}
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, UnixSocketPairTest,
+ ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_pair_nonblock.cc b/test/syscalls/linux/socket_unix_pair_nonblock.cc
new file mode 100644
index 000000000..6a40fe68c
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_pair_nonblock.cc
@@ -0,0 +1,38 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "test/syscalls/linux/socket_non_blocking.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+ return VecCat<SocketPairKind>(ApplyVec<SocketPairKind>(
+ UnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET},
+ List<int>{SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC})));
+}
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, NonBlockingSocketPairTest,
+ ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_seqpacket.cc b/test/syscalls/linux/socket_unix_seqpacket.cc
new file mode 100644
index 000000000..ad0af77e9
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_seqpacket.cc
@@ -0,0 +1,49 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_unix_seqpacket.h"
+
+#include <stdio.h>
+#include <sys/un.h>
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST_P(SeqpacketUnixSocketPairTest, WriteOneSideClosed) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds());
+ constexpr char kStr[] = "abc";
+ ASSERT_THAT(write(sockets->second_fd(), kStr, 3),
+ SyscallFailsWithErrno(EPIPE));
+}
+
+TEST_P(SeqpacketUnixSocketPairTest, ReadOneSideClosed) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds());
+ char data[10] = {};
+ ASSERT_THAT(read(sockets->second_fd(), data, sizeof(data)),
+ SyscallSucceedsWithValue(0));
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_seqpacket.h b/test/syscalls/linux/socket_unix_seqpacket.h
new file mode 100644
index 000000000..da8eb2b2b
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_seqpacket.h
@@ -0,0 +1,30 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_SEQPACKET_H_
+#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_SEQPACKET_H_
+
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// Test fixture for tests that apply to pairs of connected seqpacket unix
+// sockets.
+using SeqpacketUnixSocketPairTest = SocketPairTest;
+
+} // namespace testing
+} // namespace gvisor
+
+#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_UNIX_SEQPACKET_H_
diff --git a/test/syscalls/linux/socket_unix_seqpacket_local.cc b/test/syscalls/linux/socket_unix_seqpacket_local.cc
new file mode 100644
index 000000000..f9139a754
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_seqpacket_local.cc
@@ -0,0 +1,59 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "test/syscalls/linux/socket_non_stream.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/socket_unix_non_stream.h"
+#include "test/syscalls/linux/socket_unix_seqpacket.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+ return VecCat<SocketPairKind>(VecCat<SocketPairKind>(
+ ApplyVec<SocketPairKind>(
+ UnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_SEQPACKET},
+ List<int>{0, SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC})),
+ ApplyVec<SocketPairKind>(
+ FilesystemBoundUnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_SEQPACKET},
+ List<int>{0, SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC})),
+ ApplyVec<SocketPairKind>(
+ AbstractBoundUnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_SEQPACKET},
+ List<int>{0, SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC}))));
+}
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, NonStreamSocketPairTest,
+ ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, SeqpacketUnixSocketPairTest,
+ ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, UnixNonStreamSocketPairTest,
+ ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_stream.cc b/test/syscalls/linux/socket_unix_stream.cc
new file mode 100644
index 000000000..8232c9e35
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_stream.cc
@@ -0,0 +1,69 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+#include <sys/un.h>
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Test fixture for tests that apply to pairs of connected stream unix sockets.
+using StreamUnixSocketPairTest = SocketPairTest;
+
+TEST_P(StreamUnixSocketPairTest, WriteOneSideClosed) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds());
+ constexpr char kStr[] = "abc";
+ ASSERT_THAT(write(sockets->second_fd(), kStr, 3),
+ SyscallFailsWithErrno(EPIPE));
+}
+
+TEST_P(StreamUnixSocketPairTest, ReadOneSideClosed) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds());
+ char data[10] = {};
+ ASSERT_THAT(read(sockets->second_fd(), data, sizeof(data)),
+ SyscallSucceedsWithValue(0));
+}
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, StreamUnixSocketPairTest,
+ ::testing::ValuesIn(IncludeReversals(VecCat<SocketPairKind>(
+ ApplyVec<SocketPairKind>(
+ UnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_STREAM},
+ List<int>{0, SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC})),
+ ApplyVec<SocketPairKind>(
+ FilesystemBoundUnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_STREAM},
+ List<int>{0, SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC})),
+ ApplyVec<SocketPairKind>(
+ AbstractBoundUnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_STREAM},
+ List<int>{0, SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC}))))));
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_stream_blocking_local.cc b/test/syscalls/linux/socket_unix_stream_blocking_local.cc
new file mode 100644
index 000000000..1cdeadd27
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_stream_blocking_local.cc
@@ -0,0 +1,47 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/socket_stream_blocking.h"
+
+#include <vector>
+
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+ return VecCat<SocketPairKind>(
+ ApplyVec<SocketPairKind>(
+ UnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_STREAM},
+ List<int>{0, SOCK_CLOEXEC})),
+ ApplyVec<SocketPairKind>(
+ FilesystemBoundUnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_STREAM},
+ List<int>{0, SOCK_CLOEXEC})),
+ ApplyVec<SocketPairKind>(
+ AbstractBoundUnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_STREAM},
+ List<int>{0, SOCK_CLOEXEC})));
+}
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, BlockingStreamSocketPairTest,
+ ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_stream_local.cc b/test/syscalls/linux/socket_unix_stream_local.cc
new file mode 100644
index 000000000..9f11e2d49
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_stream_local.cc
@@ -0,0 +1,49 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "test/syscalls/linux/socket_stream.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+ return VecCat<SocketPairKind>(
+ ApplyVec<SocketPairKind>(
+ UnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_STREAM},
+ List<int>{0, SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC})),
+ ApplyVec<SocketPairKind>(
+ FilesystemBoundUnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_STREAM},
+ List<int>{0, SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC})),
+ ApplyVec<SocketPairKind>(
+ AbstractBoundUnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_STREAM},
+ List<int>{0, SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC})));
+}
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, StreamSocketPairTest,
+ ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_stream_nonblock_local.cc b/test/syscalls/linux/socket_unix_stream_nonblock_local.cc
new file mode 100644
index 000000000..4c3d3a187
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_stream_nonblock_local.cc
@@ -0,0 +1,49 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "test/syscalls/linux/socket_stream_nonblock.h"
+
+#include <vector>
+
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+std::vector<SocketPairKind> GetSocketPairs() {
+ return VecCat<SocketPairKind>(
+ ApplyVec<SocketPairKind>(
+ UnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_STREAM},
+ List<int>{SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC})),
+ ApplyVec<SocketPairKind>(
+ FilesystemBoundUnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_STREAM},
+ List<int>{SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC})),
+ ApplyVec<SocketPairKind>(
+ AbstractBoundUnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_STREAM},
+ List<int>{SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC})));
+}
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, NonBlockingStreamSocketPairTest,
+ ::testing::ValuesIn(IncludeReversals(GetSocketPairs())));
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_unbound_abstract.cc b/test/syscalls/linux/socket_unix_unbound_abstract.cc
new file mode 100644
index 000000000..a35b3b9bd
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_unbound_abstract.cc
@@ -0,0 +1,116 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+#include <sys/un.h>
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Test fixture for tests that apply to pairs of unbound abstract unix sockets.
+using UnboundAbstractUnixSocketPairTest = SocketPairTest;
+
+TEST_P(UnboundAbstractUnixSocketPairTest, AddressAfterNull) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ struct sockaddr_un addr =
+ *reinterpret_cast<const struct sockaddr_un*>(sockets->first_addr());
+ ASSERT_EQ(addr.sun_path[sizeof(addr.sun_path) - 1], 0);
+ SKIP_IF(addr.sun_path[sizeof(addr.sun_path) - 2] != 0 ||
+ addr.sun_path[sizeof(addr.sun_path) - 3] != 0);
+
+ addr.sun_path[sizeof(addr.sun_path) - 2] = 'a';
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ ASSERT_THAT(bind(sockets->second_fd(),
+ reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr)),
+ SyscallSucceeds());
+}
+
+TEST_P(UnboundAbstractUnixSocketPairTest, ShortAddressNotExtended) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ struct sockaddr_un addr =
+ *reinterpret_cast<const struct sockaddr_un*>(sockets->first_addr());
+ ASSERT_EQ(addr.sun_path[sizeof(addr.sun_path) - 1], 0);
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size() - 1),
+ SyscallSucceeds());
+
+ ASSERT_THAT(bind(sockets->second_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+}
+
+TEST_P(UnboundAbstractUnixSocketPairTest, BindNothing) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ struct sockaddr_un addr = {.sun_family = AF_UNIX};
+ ASSERT_THAT(bind(sockets->first_fd(),
+ reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr)),
+ SyscallSucceeds());
+}
+
+TEST_P(UnboundAbstractUnixSocketPairTest, GetSockNameFullLength) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ sockaddr_storage addr = {};
+ socklen_t addr_len = sizeof(addr);
+ ASSERT_THAT(getsockname(sockets->first_fd(),
+ reinterpret_cast<struct sockaddr*>(&addr), &addr_len),
+ SyscallSucceeds());
+ EXPECT_EQ(addr_len, sockets->first_addr_size());
+}
+
+TEST_P(UnboundAbstractUnixSocketPairTest, GetSockNamePartialLength) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size() - 1),
+ SyscallSucceeds());
+
+ sockaddr_storage addr = {};
+ socklen_t addr_len = sizeof(addr);
+ ASSERT_THAT(getsockname(sockets->first_fd(),
+ reinterpret_cast<struct sockaddr*>(&addr), &addr_len),
+ SyscallSucceeds());
+ EXPECT_EQ(addr_len, sockets->first_addr_size() - 1);
+}
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, UnboundAbstractUnixSocketPairTest,
+ ::testing::ValuesIn(ApplyVec<SocketPairKind>(
+ AbstractUnboundUnixDomainSocketPair,
+ AllBitwiseCombinations(
+ List<int>{SOCK_STREAM, SOCK_SEQPACKET, SOCK_DGRAM},
+ List<int>{0, SOCK_NONBLOCK}, List<int>{0, SOCK_CLOEXEC}))));
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_unbound_dgram.cc b/test/syscalls/linux/socket_unix_unbound_dgram.cc
new file mode 100644
index 000000000..a01b7f644
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_unbound_dgram.cc
@@ -0,0 +1,162 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+#include <sys/un.h>
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Test fixture for tests that apply to pairs of unbound dgram unix sockets.
+using UnboundDgramUnixSocketPairTest = SocketPairTest;
+
+TEST_P(UnboundDgramUnixSocketPairTest, BindConnect) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+ ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+}
+
+TEST_P(UnboundDgramUnixSocketPairTest, SelfConnect) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+ ASSERT_THAT(connect(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+}
+
+TEST_P(UnboundDgramUnixSocketPairTest, DoubleConnect) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+ ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+ ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+}
+
+TEST_P(UnboundDgramUnixSocketPairTest, GetRemoteAddress) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+ ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ socklen_t addressLength = sockets->first_addr_size();
+ struct sockaddr_storage address = {};
+ ASSERT_THAT(getpeername(sockets->second_fd(), (struct sockaddr*)(&address),
+ &addressLength),
+ SyscallSucceeds());
+ EXPECT_EQ(
+ 0, memcmp(&address, sockets->first_addr(), sockets->first_addr_size()));
+}
+
+TEST_P(UnboundDgramUnixSocketPairTest, Sendto) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ ASSERT_THAT(sendto(sockets->second_fd(), sent_data, sizeof(sent_data), 0,
+ sockets->first_addr(), sockets->first_addr_size()),
+ SyscallSucceedsWithValue(sizeof(sent_data)));
+
+ char received_data[sizeof(sent_data)];
+ ASSERT_THAT(ReadFd(sockets->first_fd(), received_data, sizeof(received_data)),
+ SyscallSucceedsWithValue(sizeof(received_data)));
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(received_data)));
+}
+
+TEST_P(UnboundDgramUnixSocketPairTest, ZeroWriteAllowed) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+ ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ char sent_data[3];
+ // Send a zero length packet.
+ ASSERT_THAT(write(sockets->second_fd(), sent_data, 0),
+ SyscallSucceedsWithValue(0));
+ // Receive the packet.
+ char received_data[sizeof(sent_data)];
+ ASSERT_THAT(read(sockets->first_fd(), received_data, sizeof(received_data)),
+ SyscallSucceedsWithValue(0));
+}
+
+TEST_P(UnboundDgramUnixSocketPairTest, Listen) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ ASSERT_THAT(listen(sockets->first_fd(), 0), SyscallFailsWithErrno(ENOTSUP));
+}
+
+TEST_P(UnboundDgramUnixSocketPairTest, Accept) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ ASSERT_THAT(accept(sockets->first_fd(), nullptr, nullptr),
+ SyscallFailsWithErrno(ENOTSUP));
+}
+
+TEST_P(UnboundDgramUnixSocketPairTest, SendtoWithoutConnect) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ char data = 'a';
+ ASSERT_THAT(
+ RetryEINTR(sendto)(sockets->second_fd(), &data, sizeof(data), 0,
+ sockets->first_addr(), sockets->first_addr_size()),
+ SyscallSucceedsWithValue(sizeof(data)));
+}
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, UnboundDgramUnixSocketPairTest,
+ ::testing::ValuesIn(VecCat<SocketPairKind>(
+ ApplyVec<SocketPairKind>(
+ FilesystemUnboundUnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_DGRAM},
+ List<int>{0, SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC})),
+ ApplyVec<SocketPairKind>(
+ AbstractUnboundUnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_DGRAM},
+ List<int>{0, SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC})))));
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_unbound_filesystem.cc b/test/syscalls/linux/socket_unix_unbound_filesystem.cc
new file mode 100644
index 000000000..56d882643
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_unbound_filesystem.cc
@@ -0,0 +1,84 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+#include <sys/un.h>
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Test fixture for tests that apply to pairs of unbound filesystem unix
+// sockets.
+using UnboundFilesystemUnixSocketPairTest = SocketPairTest;
+
+TEST_P(UnboundFilesystemUnixSocketPairTest, AddressAfterNull) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ struct sockaddr_un addr =
+ *reinterpret_cast<const struct sockaddr_un*>(sockets->first_addr());
+ ASSERT_EQ(addr.sun_path[sizeof(addr.sun_path) - 1], 0);
+ SKIP_IF(addr.sun_path[sizeof(addr.sun_path) - 2] != 0 ||
+ addr.sun_path[sizeof(addr.sun_path) - 3] != 0);
+
+ addr.sun_path[sizeof(addr.sun_path) - 2] = 'a';
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ ASSERT_THAT(bind(sockets->second_fd(),
+ reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr)),
+ SyscallFailsWithErrno(EADDRINUSE));
+}
+
+TEST_P(UnboundFilesystemUnixSocketPairTest, GetSockNameLength) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ sockaddr_storage got_addr = {};
+ socklen_t got_addr_len = sizeof(got_addr);
+ ASSERT_THAT(
+ getsockname(sockets->first_fd(),
+ reinterpret_cast<struct sockaddr*>(&got_addr), &got_addr_len),
+ SyscallSucceeds());
+
+ sockaddr_un want_addr =
+ *reinterpret_cast<const struct sockaddr_un*>(sockets->first_addr());
+
+ EXPECT_EQ(got_addr_len,
+ strlen(want_addr.sun_path) + 1 + sizeof(want_addr.sun_family));
+}
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, UnboundFilesystemUnixSocketPairTest,
+ ::testing::ValuesIn(ApplyVec<SocketPairKind>(
+ FilesystemUnboundUnixDomainSocketPair,
+ AllBitwiseCombinations(
+ List<int>{SOCK_STREAM, SOCK_SEQPACKET, SOCK_DGRAM},
+ List<int>{0, SOCK_NONBLOCK}, List<int>{0, SOCK_CLOEXEC}))));
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_unbound_seqpacket.cc b/test/syscalls/linux/socket_unix_unbound_seqpacket.cc
new file mode 100644
index 000000000..fa3b99490
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_unbound_seqpacket.cc
@@ -0,0 +1,91 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+#include <sys/un.h>
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Test fixture for tests that apply to pairs of unbound seqpacket unix sockets.
+using UnboundUnixSeqpacketSocketPairTest = SocketPairTest;
+
+TEST_P(UnboundUnixSeqpacketSocketPairTest, SendtoWithoutConnect) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ char data = 'a';
+ ASSERT_THAT(sendto(sockets->second_fd(), &data, sizeof(data), 0,
+ sockets->first_addr(), sockets->first_addr_size()),
+ SyscallFailsWithErrno(ENOTCONN));
+}
+
+TEST_P(UnboundUnixSeqpacketSocketPairTest, SendtoWithoutConnectIgnoresAddr) {
+ // FIXME: gVisor tries to find /foo/bar and thus returns ENOENT.
+ if (IsRunningOnGvisor()) {
+ return;
+ }
+
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ // Even a bogus address is completely ignored.
+ constexpr char kPath[] = "/foo/bar";
+
+ // Sanity check that kPath doesn't exist.
+ struct stat s;
+ ASSERT_THAT(stat(kPath, &s), SyscallFailsWithErrno(ENOENT));
+
+ struct sockaddr_un addr = {};
+ addr.sun_family = AF_UNIX;
+ memcpy(addr.sun_path, kPath, sizeof(kPath));
+
+ char data = 'a';
+ ASSERT_THAT(
+ sendto(sockets->second_fd(), &data, sizeof(data), 0,
+ reinterpret_cast<const struct sockaddr*>(&addr), sizeof(addr)),
+ SyscallFailsWithErrno(ENOTCONN));
+}
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, UnboundUnixSeqpacketSocketPairTest,
+ ::testing::ValuesIn(IncludeReversals(VecCat<SocketPairKind>(
+ ApplyVec<SocketPairKind>(
+ FilesystemUnboundUnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_SEQPACKET},
+ List<int>{0, SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC})),
+ ApplyVec<SocketPairKind>(
+ AbstractUnboundUnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_SEQPACKET},
+ List<int>{0, SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC}))))));
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/socket_unix_unbound_stream.cc b/test/syscalls/linux/socket_unix_unbound_stream.cc
new file mode 100644
index 000000000..99636b221
--- /dev/null
+++ b/test/syscalls/linux/socket_unix_unbound_stream.cc
@@ -0,0 +1,738 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+#include <sys/un.h>
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Test fixture for tests that apply to pairs of connected unix stream sockets.
+using UnixStreamSocketPairTest = SocketPairTest;
+
+// FDPassPartialRead checks that sent control messages cannot be read after
+// any of their assocated data has been read while ignoring the control message
+// by using read(2) instead of recvmsg(2).
+TEST_P(UnixStreamSocketPairTest, FDPassPartialRead) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ auto pair =
+ ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+ ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+ sent_data, sizeof(sent_data)));
+
+ char received_data[sizeof(sent_data) / 2];
+ ASSERT_THAT(
+ ReadFd(sockets->second_fd(), received_data, sizeof(received_data)),
+ SyscallSucceedsWithValue(sizeof(received_data)));
+
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(received_data)));
+
+ RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data));
+ EXPECT_EQ(0, memcmp(sent_data + sizeof(received_data), received_data,
+ sizeof(received_data)));
+}
+
+TEST_P(UnixStreamSocketPairTest, FDPassCoalescedRead) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data1[20];
+ RandomizeBuffer(sent_data1, sizeof(sent_data1));
+
+ auto pair1 =
+ ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+ ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair1->second_fd(),
+ sent_data1, sizeof(sent_data1)));
+
+ char sent_data2[20];
+ RandomizeBuffer(sent_data2, sizeof(sent_data2));
+
+ auto pair2 =
+ ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+ ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair2->second_fd(),
+ sent_data2, sizeof(sent_data2)));
+
+ char received_data[sizeof(sent_data1) + sizeof(sent_data2)];
+ ASSERT_THAT(
+ ReadFd(sockets->second_fd(), received_data, sizeof(received_data)),
+ SyscallSucceedsWithValue(sizeof(received_data)));
+
+ EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1)));
+ EXPECT_EQ(0, memcmp(sent_data2, received_data + sizeof(sent_data1),
+ sizeof(sent_data2)));
+}
+
+// ZeroLengthMessageFDDiscarded checks that control messages associated with
+// zero length messages are discarded.
+TEST_P(UnixStreamSocketPairTest, ZeroLengthMessageFDDiscarded) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ // Zero length arrays are invalid in ISO C++, so allocate one of size 1 and
+ // send a length of 0.
+ char sent_data1[1] = {};
+
+ auto pair =
+ ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+ ASSERT_NO_FATAL_FAILURE(
+ SendSingleFD(sockets->first_fd(), pair->second_fd(), sent_data1, 0));
+
+ char sent_data2[20];
+ RandomizeBuffer(sent_data2, sizeof(sent_data2));
+
+ ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data2, sizeof(sent_data2)),
+ SyscallSucceedsWithValue(sizeof(sent_data2)));
+
+ char received_data[sizeof(sent_data2)] = {};
+
+ RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data));
+ EXPECT_EQ(0, memcmp(sent_data2, received_data, sizeof(received_data)));
+}
+
+// FDPassCoalescedRecv checks that control messages not in the first message are
+// preserved in a coalesced recv.
+TEST_P(UnixStreamSocketPairTest, FDPassCoalescedRecv) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data, sizeof(sent_data) / 2),
+ SyscallSucceedsWithValue(sizeof(sent_data) / 2));
+
+ auto pair =
+ ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+ ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+ sent_data + sizeof(sent_data) / 2,
+ sizeof(sent_data) / 2));
+
+ char received_data[sizeof(sent_data)];
+
+ int fd = -1;
+ ASSERT_NO_FATAL_FAILURE(RecvSingleFD(sockets->second_fd(), &fd, received_data,
+ sizeof(received_data)));
+
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+
+ ASSERT_NO_FATAL_FAILURE(TransferTest(fd, pair->first_fd()));
+}
+
+// ReadsNotCoalescedAfterFDPass checks that messages after a message containing
+// an FD control message are not coalesced.
+TEST_P(UnixStreamSocketPairTest, ReadsNotCoalescedAfterFDPass) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ auto pair =
+ ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+ ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair->second_fd(),
+ sent_data, sizeof(sent_data) / 2));
+
+ ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data + sizeof(sent_data) / 2,
+ sizeof(sent_data) / 2),
+ SyscallSucceedsWithValue(sizeof(sent_data) / 2));
+
+ char received_data[sizeof(sent_data)];
+
+ int fd = -1;
+ ASSERT_NO_FATAL_FAILURE(RecvSingleFD(sockets->second_fd(), &fd, received_data,
+ sizeof(received_data),
+ sizeof(sent_data) / 2));
+
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data) / 2));
+
+ ASSERT_NO_FATAL_FAILURE(TransferTest(fd, pair->first_fd()));
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+
+ ASSERT_NO_FATAL_FAILURE(
+ RecvNoCmsg(sockets->second_fd(), received_data, sizeof(sent_data) / 2));
+
+ EXPECT_EQ(0, memcmp(sent_data + sizeof(sent_data) / 2, received_data,
+ sizeof(sent_data) / 2));
+}
+
+// FDPassNotCombined checks that FD control messages are not combined in a
+// coalesced read.
+TEST_P(UnixStreamSocketPairTest, FDPassNotCombined) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ auto pair1 =
+ ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+ ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair1->second_fd(),
+ sent_data, sizeof(sent_data) / 2));
+
+ auto pair2 =
+ ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+ ASSERT_NO_FATAL_FAILURE(SendSingleFD(sockets->first_fd(), pair2->second_fd(),
+ sent_data + sizeof(sent_data) / 2,
+ sizeof(sent_data) / 2));
+
+ char received_data[sizeof(sent_data)];
+
+ int fd = -1;
+ ASSERT_NO_FATAL_FAILURE(RecvSingleFD(sockets->second_fd(), &fd, received_data,
+ sizeof(received_data),
+ sizeof(sent_data) / 2));
+
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data) / 2));
+
+ ASSERT_NO_FATAL_FAILURE(TransferTest(fd, pair1->first_fd()));
+
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+ fd = -1;
+
+ ASSERT_NO_FATAL_FAILURE(RecvSingleFD(sockets->second_fd(), &fd, received_data,
+ sizeof(received_data),
+ sizeof(sent_data) / 2));
+
+ EXPECT_EQ(0, memcmp(sent_data + sizeof(sent_data) / 2, received_data,
+ sizeof(sent_data) / 2));
+
+ ASSERT_NO_FATAL_FAILURE(TransferTest(fd, pair2->first_fd()));
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+TEST_P(UnixStreamSocketPairTest, CredPassPartialRead) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data[20];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+
+ struct ucred sent_creds;
+
+ ASSERT_THAT(sent_creds.pid = getpid(), SyscallSucceeds());
+ ASSERT_THAT(sent_creds.uid = getuid(), SyscallSucceeds());
+ ASSERT_THAT(sent_creds.gid = getgid(), SyscallSucceeds());
+
+ ASSERT_NO_FATAL_FAILURE(
+ SendCreds(sockets->first_fd(), sent_creds, sent_data, sizeof(sent_data)));
+
+ int one = 1;
+ ASSERT_THAT(setsockopt(sockets->second_fd(), SOL_SOCKET, SO_PASSCRED, &one,
+ sizeof(one)),
+ SyscallSucceeds());
+
+ for (int i = 0; i < 2; i++) {
+ char received_data[10];
+ struct ucred received_creds;
+ ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds,
+ received_data, sizeof(received_data),
+ sizeof(received_data)));
+
+ EXPECT_EQ(0, memcmp(sent_data + i * sizeof(received_data), received_data,
+ sizeof(received_data)));
+ EXPECT_EQ(sent_creds.pid, received_creds.pid);
+ EXPECT_EQ(sent_creds.uid, received_creds.uid);
+ EXPECT_EQ(sent_creds.gid, received_creds.gid);
+ }
+}
+
+// Unix stream sockets peek in the same way as datagram sockets.
+//
+// SinglePeek checks that only a single message is peekable in a single recv.
+TEST_P(UnixStreamSocketPairTest, SinglePeek) {
+ if (!IsRunningOnGvisor()) {
+ // Don't run this test on linux kernels newer than 4.3.x Linux kernel commit
+ // 9f389e35674f5b086edd70ed524ca0f287259725 which changes this behavior. We
+ // used to target 3.11 compatibility, so disable this test on newer kernels.
+ //
+ // NOTE: Bring this up to Linux 4.4 compatibility.
+ auto version = ASSERT_NO_ERRNO_AND_VALUE(GetKernelVersion());
+ SKIP_IF(version.major > 4 || (version.major == 4 && version.minor >= 3));
+ }
+
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ char sent_data[40];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+ ASSERT_THAT(RetryEINTR(send)(sockets->first_fd(), sent_data,
+ sizeof(sent_data) / 2, 0),
+ SyscallSucceedsWithValue(sizeof(sent_data) / 2));
+ ASSERT_THAT(
+ RetryEINTR(send)(sockets->first_fd(), sent_data + sizeof(sent_data) / 2,
+ sizeof(sent_data) / 2, 0),
+ SyscallSucceedsWithValue(sizeof(sent_data) / 2));
+ char received_data[sizeof(sent_data)];
+ for (int i = 0; i < 3; i++) {
+ memset(received_data, 0, sizeof(received_data));
+ ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+ sizeof(received_data), MSG_PEEK),
+ SyscallSucceedsWithValue(sizeof(sent_data) / 2));
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data) / 2));
+ }
+ memset(received_data, 0, sizeof(received_data));
+ ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+ sizeof(sent_data) / 2, 0),
+ SyscallSucceedsWithValue(sizeof(sent_data) / 2));
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data) / 2));
+ memset(received_data, 0, sizeof(received_data));
+ ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), received_data,
+ sizeof(sent_data) / 2, 0),
+ SyscallSucceedsWithValue(sizeof(sent_data) / 2));
+ EXPECT_EQ(0, memcmp(sent_data + sizeof(sent_data) / 2, received_data,
+ sizeof(sent_data) / 2));
+}
+
+TEST_P(UnixStreamSocketPairTest, CredsNotCoalescedUp) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data1[20];
+ RandomizeBuffer(sent_data1, sizeof(sent_data1));
+
+ ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data1, sizeof(sent_data1)),
+ SyscallSucceedsWithValue(sizeof(sent_data1)));
+
+ SetSoPassCred(sockets->second_fd());
+
+ char sent_data2[20];
+ RandomizeBuffer(sent_data2, sizeof(sent_data2));
+
+ ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data2, sizeof(sent_data2)),
+ SyscallSucceedsWithValue(sizeof(sent_data2)));
+
+ char received_data[sizeof(sent_data1) + sizeof(sent_data2)];
+
+ struct ucred received_creds;
+ ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds,
+ received_data, sizeof(received_data),
+ sizeof(sent_data1)));
+
+ EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1)));
+
+ struct ucred want_creds {
+ 0, 65534, 65534
+ };
+
+ EXPECT_EQ(want_creds.pid, received_creds.pid);
+ EXPECT_EQ(want_creds.uid, received_creds.uid);
+ EXPECT_EQ(want_creds.gid, received_creds.gid);
+
+ ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds,
+ received_data, sizeof(received_data),
+ sizeof(sent_data2)));
+
+ EXPECT_EQ(0, memcmp(sent_data2, received_data, sizeof(sent_data2)));
+
+ ASSERT_THAT(want_creds.pid = getpid(), SyscallSucceeds());
+ ASSERT_THAT(want_creds.uid = getuid(), SyscallSucceeds());
+ ASSERT_THAT(want_creds.gid = getgid(), SyscallSucceeds());
+
+ EXPECT_EQ(want_creds.pid, received_creds.pid);
+ EXPECT_EQ(want_creds.uid, received_creds.uid);
+ EXPECT_EQ(want_creds.gid, received_creds.gid);
+}
+
+TEST_P(UnixStreamSocketPairTest, CredsNotCoalescedDown) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ SetSoPassCred(sockets->second_fd());
+
+ char sent_data1[20];
+ RandomizeBuffer(sent_data1, sizeof(sent_data1));
+
+ ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data1, sizeof(sent_data1)),
+ SyscallSucceedsWithValue(sizeof(sent_data1)));
+
+ UnsetSoPassCred(sockets->second_fd());
+
+ char sent_data2[20];
+ RandomizeBuffer(sent_data2, sizeof(sent_data2));
+
+ ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data2, sizeof(sent_data2)),
+ SyscallSucceedsWithValue(sizeof(sent_data2)));
+
+ SetSoPassCred(sockets->second_fd());
+
+ char received_data[sizeof(sent_data1) + sizeof(sent_data2)];
+ struct ucred received_creds;
+
+ ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds,
+ received_data, sizeof(received_data),
+ sizeof(sent_data1)));
+
+ EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1)));
+
+ struct ucred want_creds;
+ ASSERT_THAT(want_creds.pid = getpid(), SyscallSucceeds());
+ ASSERT_THAT(want_creds.uid = getuid(), SyscallSucceeds());
+ ASSERT_THAT(want_creds.gid = getgid(), SyscallSucceeds());
+
+ EXPECT_EQ(want_creds.pid, received_creds.pid);
+ EXPECT_EQ(want_creds.uid, received_creds.uid);
+ EXPECT_EQ(want_creds.gid, received_creds.gid);
+
+ ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds,
+ received_data, sizeof(received_data),
+ sizeof(sent_data2)));
+
+ EXPECT_EQ(0, memcmp(sent_data2, received_data, sizeof(sent_data2)));
+
+ want_creds = {0, 65534, 65534};
+
+ EXPECT_EQ(want_creds.pid, received_creds.pid);
+ EXPECT_EQ(want_creds.uid, received_creds.uid);
+ EXPECT_EQ(want_creds.gid, received_creds.gid);
+}
+
+TEST_P(UnixStreamSocketPairTest, CoalescedCredsNoPasscred) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ SetSoPassCred(sockets->second_fd());
+
+ char sent_data1[20];
+ RandomizeBuffer(sent_data1, sizeof(sent_data1));
+
+ ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data1, sizeof(sent_data1)),
+ SyscallSucceedsWithValue(sizeof(sent_data1)));
+
+ UnsetSoPassCred(sockets->second_fd());
+
+ char sent_data2[20];
+ RandomizeBuffer(sent_data2, sizeof(sent_data2));
+
+ ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data2, sizeof(sent_data2)),
+ SyscallSucceedsWithValue(sizeof(sent_data2)));
+
+ char received_data[sizeof(sent_data1) + sizeof(sent_data2)];
+
+ ASSERT_NO_FATAL_FAILURE(
+ RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data)));
+
+ EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1)));
+ EXPECT_EQ(0, memcmp(sent_data2, received_data + sizeof(sent_data1),
+ sizeof(sent_data2)));
+}
+
+TEST_P(UnixStreamSocketPairTest, CoalescedCreds1) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data1[20];
+ RandomizeBuffer(sent_data1, sizeof(sent_data1));
+
+ ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data1, sizeof(sent_data1)),
+ SyscallSucceedsWithValue(sizeof(sent_data1)));
+
+ char sent_data2[20];
+ RandomizeBuffer(sent_data2, sizeof(sent_data2));
+
+ ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data2, sizeof(sent_data2)),
+ SyscallSucceedsWithValue(sizeof(sent_data2)));
+
+ SetSoPassCred(sockets->second_fd());
+
+ char received_data[sizeof(sent_data1) + sizeof(sent_data2)];
+ struct ucred received_creds;
+
+ ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds,
+ received_data, sizeof(received_data)));
+
+ EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1)));
+ EXPECT_EQ(0, memcmp(sent_data2, received_data + sizeof(sent_data1),
+ sizeof(sent_data2)));
+
+ struct ucred want_creds {
+ 0, 65534, 65534
+ };
+
+ EXPECT_EQ(want_creds.pid, received_creds.pid);
+ EXPECT_EQ(want_creds.uid, received_creds.uid);
+ EXPECT_EQ(want_creds.gid, received_creds.gid);
+}
+
+TEST_P(UnixStreamSocketPairTest, CoalescedCreds2) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ SetSoPassCred(sockets->second_fd());
+
+ char sent_data1[20];
+ RandomizeBuffer(sent_data1, sizeof(sent_data1));
+
+ ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data1, sizeof(sent_data1)),
+ SyscallSucceedsWithValue(sizeof(sent_data1)));
+
+ char sent_data2[20];
+ RandomizeBuffer(sent_data2, sizeof(sent_data2));
+
+ ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data2, sizeof(sent_data2)),
+ SyscallSucceedsWithValue(sizeof(sent_data2)));
+
+ char received_data[sizeof(sent_data1) + sizeof(sent_data2)];
+ struct ucred received_creds;
+
+ ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds,
+ received_data, sizeof(received_data)));
+
+ EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1)));
+ EXPECT_EQ(0, memcmp(sent_data2, received_data + sizeof(sent_data1),
+ sizeof(sent_data2)));
+
+ struct ucred want_creds;
+ ASSERT_THAT(want_creds.pid = getpid(), SyscallSucceeds());
+ ASSERT_THAT(want_creds.uid = getuid(), SyscallSucceeds());
+ ASSERT_THAT(want_creds.gid = getgid(), SyscallSucceeds());
+
+ EXPECT_EQ(want_creds.pid, received_creds.pid);
+ EXPECT_EQ(want_creds.uid, received_creds.uid);
+ EXPECT_EQ(want_creds.gid, received_creds.gid);
+}
+
+TEST_P(UnixStreamSocketPairTest, NonCoalescedDifferingCreds1) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ char sent_data1[20];
+ RandomizeBuffer(sent_data1, sizeof(sent_data1));
+
+ ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data1, sizeof(sent_data1)),
+ SyscallSucceedsWithValue(sizeof(sent_data1)));
+
+ SetSoPassCred(sockets->second_fd());
+
+ char sent_data2[20];
+ RandomizeBuffer(sent_data2, sizeof(sent_data2));
+
+ ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data2, sizeof(sent_data2)),
+ SyscallSucceedsWithValue(sizeof(sent_data2)));
+
+ char received_data1[sizeof(sent_data1) + sizeof(sent_data2)];
+ struct ucred received_creds1;
+
+ ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds1,
+ received_data1, sizeof(sent_data1)));
+
+ EXPECT_EQ(0, memcmp(sent_data1, received_data1, sizeof(sent_data1)));
+
+ struct ucred want_creds1 {
+ 0, 65534, 65534
+ };
+
+ EXPECT_EQ(want_creds1.pid, received_creds1.pid);
+ EXPECT_EQ(want_creds1.uid, received_creds1.uid);
+ EXPECT_EQ(want_creds1.gid, received_creds1.gid);
+
+ char received_data2[sizeof(sent_data1) + sizeof(sent_data2)];
+ struct ucred received_creds2;
+
+ ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds2,
+ received_data2, sizeof(sent_data2)));
+
+ EXPECT_EQ(0, memcmp(sent_data2, received_data2, sizeof(sent_data2)));
+
+ struct ucred want_creds2;
+ ASSERT_THAT(want_creds2.pid = getpid(), SyscallSucceeds());
+ ASSERT_THAT(want_creds2.uid = getuid(), SyscallSucceeds());
+ ASSERT_THAT(want_creds2.gid = getgid(), SyscallSucceeds());
+
+ EXPECT_EQ(want_creds2.pid, received_creds2.pid);
+ EXPECT_EQ(want_creds2.uid, received_creds2.uid);
+ EXPECT_EQ(want_creds2.gid, received_creds2.gid);
+}
+
+TEST_P(UnixStreamSocketPairTest, NonCoalescedDifferingCreds2) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ SetSoPassCred(sockets->second_fd());
+
+ char sent_data1[20];
+ RandomizeBuffer(sent_data1, sizeof(sent_data1));
+
+ ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data1, sizeof(sent_data1)),
+ SyscallSucceedsWithValue(sizeof(sent_data1)));
+
+ UnsetSoPassCred(sockets->second_fd());
+
+ char sent_data2[20];
+ RandomizeBuffer(sent_data2, sizeof(sent_data2));
+
+ ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data2, sizeof(sent_data2)),
+ SyscallSucceedsWithValue(sizeof(sent_data2)));
+
+ SetSoPassCred(sockets->second_fd());
+
+ char received_data1[sizeof(sent_data1) + sizeof(sent_data2)];
+ struct ucred received_creds1;
+
+ ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds1,
+ received_data1, sizeof(sent_data1)));
+
+ EXPECT_EQ(0, memcmp(sent_data1, received_data1, sizeof(sent_data1)));
+
+ struct ucred want_creds1;
+ ASSERT_THAT(want_creds1.pid = getpid(), SyscallSucceeds());
+ ASSERT_THAT(want_creds1.uid = getuid(), SyscallSucceeds());
+ ASSERT_THAT(want_creds1.gid = getgid(), SyscallSucceeds());
+
+ EXPECT_EQ(want_creds1.pid, received_creds1.pid);
+ EXPECT_EQ(want_creds1.uid, received_creds1.uid);
+ EXPECT_EQ(want_creds1.gid, received_creds1.gid);
+
+ char received_data2[sizeof(sent_data1) + sizeof(sent_data2)];
+ struct ucred received_creds2;
+
+ ASSERT_NO_FATAL_FAILURE(RecvCreds(sockets->second_fd(), &received_creds2,
+ received_data2, sizeof(sent_data2)));
+
+ EXPECT_EQ(0, memcmp(sent_data2, received_data2, sizeof(sent_data2)));
+
+ struct ucred want_creds2 {
+ 0, 65534, 65534
+ };
+
+ EXPECT_EQ(want_creds2.pid, received_creds2.pid);
+ EXPECT_EQ(want_creds2.uid, received_creds2.uid);
+ EXPECT_EQ(want_creds2.gid, received_creds2.gid);
+}
+
+TEST_P(UnixStreamSocketPairTest, CoalescedDifferingCreds) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ SetSoPassCred(sockets->second_fd());
+
+ char sent_data1[20];
+ RandomizeBuffer(sent_data1, sizeof(sent_data1));
+
+ ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data1, sizeof(sent_data1)),
+ SyscallSucceedsWithValue(sizeof(sent_data1)));
+
+ char sent_data2[20];
+ RandomizeBuffer(sent_data2, sizeof(sent_data2));
+
+ ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data2, sizeof(sent_data2)),
+ SyscallSucceedsWithValue(sizeof(sent_data2)));
+
+ UnsetSoPassCred(sockets->second_fd());
+
+ char sent_data3[20];
+ RandomizeBuffer(sent_data3, sizeof(sent_data3));
+
+ ASSERT_THAT(WriteFd(sockets->first_fd(), sent_data3, sizeof(sent_data3)),
+ SyscallSucceedsWithValue(sizeof(sent_data3)));
+
+ char received_data[sizeof(sent_data1) + sizeof(sent_data2) +
+ sizeof(sent_data3)];
+
+ ASSERT_NO_FATAL_FAILURE(
+ RecvNoCmsg(sockets->second_fd(), received_data, sizeof(received_data)));
+
+ EXPECT_EQ(0, memcmp(sent_data1, received_data, sizeof(sent_data1)));
+ EXPECT_EQ(0, memcmp(sent_data2, received_data + sizeof(sent_data1),
+ sizeof(sent_data2)));
+ EXPECT_EQ(0, memcmp(sent_data3,
+ received_data + sizeof(sent_data1) + sizeof(sent_data2),
+ sizeof(sent_data3)));
+}
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, UnixStreamSocketPairTest,
+ ::testing::ValuesIn(IncludeReversals(VecCat<SocketPairKind>(
+ ApplyVec<SocketPairKind>(
+ UnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_STREAM},
+ List<int>{0, SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC})),
+ ApplyVec<SocketPairKind>(
+ FilesystemBoundUnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_STREAM},
+ List<int>{0, SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC})),
+ ApplyVec<SocketPairKind>(
+ AbstractBoundUnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_STREAM},
+ List<int>{0, SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC}))))));
+
+// Test fixture for tests that apply to pairs of unbound unix stream sockets.
+using UnboundUnixStreamSocketPairTest = SocketPairTest;
+
+TEST_P(UnboundUnixStreamSocketPairTest, SendtoWithoutConnect) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ char data = 'a';
+ ASSERT_THAT(sendto(sockets->second_fd(), &data, sizeof(data), 0,
+ sockets->first_addr(), sockets->first_addr_size()),
+ SyscallFailsWithErrno(EOPNOTSUPP));
+}
+
+TEST_P(UnboundUnixStreamSocketPairTest, SendtoWithoutConnectIgnoresAddr) {
+ // FIXME: gVisor tries to find /foo/bar and thus returns ENOENT.
+ if (IsRunningOnGvisor()) {
+ return;
+ }
+
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+
+ ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(),
+ sockets->first_addr_size()),
+ SyscallSucceeds());
+
+ // Even a bogus address is completely ignored.
+ constexpr char kPath[] = "/foo/bar";
+
+ // Sanity check that kPath doesn't exist.
+ struct stat s;
+ ASSERT_THAT(stat(kPath, &s), SyscallFailsWithErrno(ENOENT));
+
+ struct sockaddr_un addr = {};
+ addr.sun_family = AF_UNIX;
+ memcpy(addr.sun_path, kPath, sizeof(kPath));
+
+ char data = 'a';
+ ASSERT_THAT(
+ sendto(sockets->second_fd(), &data, sizeof(data), 0,
+ reinterpret_cast<const struct sockaddr*>(&addr), sizeof(addr)),
+ SyscallFailsWithErrno(EOPNOTSUPP));
+}
+
+INSTANTIATE_TEST_CASE_P(
+ AllUnixDomainSockets, UnboundUnixStreamSocketPairTest,
+ ::testing::ValuesIn(IncludeReversals(VecCat<SocketPairKind>(
+ ApplyVec<SocketPairKind>(
+ FilesystemUnboundUnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_STREAM},
+ List<int>{0, SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC})),
+ ApplyVec<SocketPairKind>(
+ AbstractUnboundUnixDomainSocketPair,
+ AllBitwiseCombinations(List<int>{SOCK_STREAM},
+ List<int>{0, SOCK_NONBLOCK},
+ List<int>{0, SOCK_CLOEXEC}))))));
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/stat.cc b/test/syscalls/linux/stat.cc
new file mode 100644
index 000000000..aea19dbff
--- /dev/null
+++ b/test/syscalls/linux/stat.cc
@@ -0,0 +1,410 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <unistd.h>
+#include <string>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "absl/strings/match.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/string_view.h"
+#include "test/syscalls/linux/file_base.h"
+#include "test/util/cleanup.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+class StatTest : public FileTest {};
+
+TEST_F(StatTest, FstatatAbs) {
+ struct stat st;
+
+ // Check that the stat works.
+ EXPECT_THAT(fstatat(AT_FDCWD, test_file_name_.c_str(), &st, 0),
+ SyscallSucceeds());
+ EXPECT_TRUE(S_ISREG(st.st_mode));
+}
+
+TEST_F(StatTest, FstatatEmptyPath) {
+ struct stat st;
+ const auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDONLY));
+
+ // Check that the stat works.
+ EXPECT_THAT(fstatat(fd.get(), "", &st, AT_EMPTY_PATH), SyscallSucceeds());
+ EXPECT_TRUE(S_ISREG(st.st_mode));
+}
+
+TEST_F(StatTest, FstatatRel) {
+ struct stat st;
+ int dirfd;
+ auto filename = std::string(Basename(test_file_name_));
+
+ // Open the temporary directory read-only.
+ ASSERT_THAT(dirfd = open(GetAbsoluteTestTmpdir().c_str(), O_RDONLY),
+ SyscallSucceeds());
+
+ // Check that the stat works.
+ EXPECT_THAT(fstatat(dirfd, filename.c_str(), &st, 0), SyscallSucceeds());
+ EXPECT_TRUE(S_ISREG(st.st_mode));
+ close(dirfd);
+}
+
+TEST_F(StatTest, FstatatSymlink) {
+ struct stat st;
+
+ // Check that the link is followed.
+ EXPECT_THAT(fstatat(AT_FDCWD, "/proc/self", &st, 0), SyscallSucceeds());
+ EXPECT_TRUE(S_ISDIR(st.st_mode));
+ EXPECT_FALSE(S_ISLNK(st.st_mode));
+
+ // Check that the flag works.
+ EXPECT_THAT(fstatat(AT_FDCWD, "/proc/self", &st, AT_SYMLINK_NOFOLLOW),
+ SyscallSucceeds());
+ EXPECT_TRUE(S_ISLNK(st.st_mode));
+ EXPECT_FALSE(S_ISDIR(st.st_mode));
+}
+
+TEST_F(StatTest, Nlinks) {
+ TempPath basedir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+ // Directory is initially empty, it should contain 2 links (one from itself,
+ // one from ".").
+ EXPECT_THAT(Links(basedir.path()), IsPosixErrorOkAndHolds(2));
+
+ // Create a file in the test directory. Files shouldn't increase the link
+ // count on the base directory.
+ TempPath file1 =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(basedir.path()));
+ EXPECT_THAT(Links(basedir.path()), IsPosixErrorOkAndHolds(2));
+
+ // Create subdirectories. This should increase the link count by 1 per
+ // subdirectory.
+ TempPath dir1 =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(basedir.path()));
+ EXPECT_THAT(Links(basedir.path()), IsPosixErrorOkAndHolds(3));
+ TempPath dir2 =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(basedir.path()));
+ EXPECT_THAT(Links(basedir.path()), IsPosixErrorOkAndHolds(4));
+
+ // Removing directories should reduce the link count.
+ dir1.reset();
+ EXPECT_THAT(Links(basedir.path()), IsPosixErrorOkAndHolds(3));
+ dir2.reset();
+ EXPECT_THAT(Links(basedir.path()), IsPosixErrorOkAndHolds(2));
+
+ // Removing files should have no effect on link count.
+ file1.reset();
+ EXPECT_THAT(Links(basedir.path()), IsPosixErrorOkAndHolds(2));
+}
+
+TEST_F(StatTest, BlocksIncreaseOnWrite) {
+ struct stat st;
+
+ // Stat the empty file.
+ ASSERT_THAT(fstat(test_file_fd_.get(), &st), SyscallSucceeds());
+
+ const int initial_blocks = st.st_blocks;
+
+ // Write to the file, making sure to exceed the block size.
+ std::vector<char> buf(2 * st.st_blksize, 'a');
+ ASSERT_THAT(write(test_file_fd_.get(), buf.data(), buf.size()),
+ SyscallSucceedsWithValue(buf.size()));
+
+ // Stat the file again, and verify that number of allocated blocks has
+ // increased.
+ ASSERT_THAT(fstat(test_file_fd_.get(), &st), SyscallSucceeds());
+ EXPECT_GT(st.st_blocks, initial_blocks);
+}
+
+TEST_F(StatTest, PathNotCleaned) {
+ TempPath basedir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+ // Create a file in the basedir.
+ TempPath file =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(basedir.path()));
+
+ // Stating the file directly should succeed.
+ struct stat buf;
+ EXPECT_THAT(lstat(file.path().c_str(), &buf), SyscallSucceeds());
+
+ // Try to stat the file using a directory that does not exist followed by
+ // "..". If the path is cleaned prior to stating (which it should not be)
+ // then this will succeed.
+ const std::string bad_path = JoinPath("/does_not_exist/..", file.path());
+ EXPECT_THAT(lstat(bad_path.c_str(), &buf), SyscallFailsWithErrno(ENOENT));
+}
+
+TEST_F(StatTest, PathCanContainDotDot) {
+ TempPath basedir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ TempPath subdir =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(basedir.path()));
+ const std::string subdir_name = std::string(Basename(subdir.path()));
+
+ // Create a file in the subdir.
+ TempPath file =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(subdir.path()));
+ const std::string file_name = std::string(Basename(file.path()));
+
+ // Stat the file through a path that includes '..' and '.' but still resolves
+ // to the file.
+ const std::string good_path =
+ JoinPath(basedir.path(), subdir_name, "..", subdir_name, ".", file_name);
+ struct stat buf;
+ EXPECT_THAT(lstat(good_path.c_str(), &buf), SyscallSucceeds());
+}
+
+TEST_F(StatTest, PathCanContainEmptyComponent) {
+ TempPath basedir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+ // Create a file in the basedir.
+ TempPath file =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(basedir.path()));
+ const std::string file_name = std::string(Basename(file.path()));
+
+ // Stat the file through a path that includes an empty component. We have to
+ // build this ourselves because JoinPath automatically removes empty
+ // components.
+ const std::string good_path = absl::StrCat(basedir.path(), "//", file_name);
+ struct stat buf;
+ EXPECT_THAT(lstat(good_path.c_str(), &buf), SyscallSucceeds());
+}
+
+TEST_F(StatTest, TrailingSlashNotCleanedReturnsENOTDIR) {
+ TempPath basedir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+ // Create a file in the basedir.
+ TempPath file =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(basedir.path()));
+
+ // Stat the file with an extra "/" on the end of it. Since file is not a
+ // directory, this should return ENOTDIR.
+ const std::string bad_path = absl::StrCat(file.path(), "/");
+ struct stat buf;
+ EXPECT_THAT(lstat(bad_path.c_str(), &buf), SyscallFailsWithErrno(ENOTDIR));
+}
+
+TEST_F(StatTest, LeadingDoubleSlash) {
+ // Create a file, and make sure we can stat it.
+ TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ struct stat st;
+ ASSERT_THAT(lstat(file.path().c_str(), &st), SyscallSucceeds());
+
+ // Now add an extra leading slash.
+ const std::string double_slash_path = absl::StrCat("/", file.path());
+ ASSERT_TRUE(absl::StartsWith(double_slash_path, "//"));
+
+ // We should be able to stat the new path, and it should resolve to the same
+ // file (same device and inode).
+ struct stat double_slash_st;
+ ASSERT_THAT(lstat(double_slash_path.c_str(), &double_slash_st),
+ SyscallSucceeds());
+ EXPECT_EQ(st.st_dev, double_slash_st.st_dev);
+ EXPECT_EQ(st.st_ino, double_slash_st.st_ino);
+}
+
+// Test that a rename doesn't change the underlying file.
+TEST_F(StatTest, StatDoesntChangeAfterRename) {
+ const TempPath old_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const TempPath new_path(NewTempAbsPath());
+
+ struct stat st_old = {};
+ struct stat st_new = {};
+
+ ASSERT_THAT(stat(old_dir.path().c_str(), &st_old), SyscallSucceeds());
+ ASSERT_THAT(rename(old_dir.path().c_str(), new_path.path().c_str()),
+ SyscallSucceeds());
+ ASSERT_THAT(stat(new_path.path().c_str(), &st_new), SyscallSucceeds());
+
+ EXPECT_EQ(st_old.st_nlink, st_new.st_nlink);
+ EXPECT_EQ(st_old.st_dev, st_new.st_dev);
+ EXPECT_EQ(st_old.st_ino, st_new.st_ino);
+ EXPECT_EQ(st_old.st_mode, st_new.st_mode);
+ EXPECT_EQ(st_old.st_uid, st_new.st_uid);
+ EXPECT_EQ(st_old.st_gid, st_new.st_gid);
+ EXPECT_EQ(st_old.st_size, st_new.st_size);
+}
+
+// Test link counts with a regular file as the child.
+TEST_F(StatTest, LinkCountsWithRegularFileChild) {
+ const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+ struct stat st_parent_before = {};
+ ASSERT_THAT(stat(dir.path().c_str(), &st_parent_before), SyscallSucceeds());
+ EXPECT_EQ(st_parent_before.st_nlink, 2);
+
+ // Adding a regular file doesn't adjust the parent's link count.
+ const TempPath child =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path()));
+
+ struct stat st_parent_after = {};
+ ASSERT_THAT(stat(dir.path().c_str(), &st_parent_after), SyscallSucceeds());
+ EXPECT_EQ(st_parent_after.st_nlink, 2);
+
+ // The child should have a single link from the parent.
+ struct stat st_child = {};
+ ASSERT_THAT(stat(child.path().c_str(), &st_child), SyscallSucceeds());
+ EXPECT_TRUE(S_ISREG(st_child.st_mode));
+ EXPECT_EQ(st_child.st_nlink, 1);
+
+ // Finally unlinking the child should not affect the parent's link count.
+ ASSERT_THAT(unlink(child.path().c_str()), SyscallSucceeds());
+ ASSERT_THAT(stat(dir.path().c_str(), &st_parent_after), SyscallSucceeds());
+ EXPECT_EQ(st_parent_after.st_nlink, 2);
+}
+
+// This test verifies that inodes remain around when there is an open fd
+// after link count hits 0.
+TEST_F(StatTest, ZeroLinksOpenFdRegularFileChild) {
+ // Setting the enviornment variable GVISOR_GOFER_UNCACHED to any value
+ // will prevent this test from running, see the tmpfs lifecycle.
+ //
+ // We need to support this because when a file is unlinked and we forward
+ // the stat to the gofer it would return ENOENT.
+ const char* uncached_gofer = getenv("GVISOR_GOFER_UNCACHED");
+ SKIP_IF(uncached_gofer != nullptr);
+
+ // We don't support saving unlinked files.
+ const DisableSave ds;
+
+ const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const TempPath child = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ dir.path(), "hello", TempPath::kDefaultFileMode));
+
+ // The child should have a single link from the parent.
+ struct stat st_child_before = {};
+ ASSERT_THAT(stat(child.path().c_str(), &st_child_before), SyscallSucceeds());
+ EXPECT_TRUE(S_ISREG(st_child_before.st_mode));
+ EXPECT_EQ(st_child_before.st_nlink, 1);
+ EXPECT_EQ(st_child_before.st_size, 5); // Hello is 5 bytes.
+
+ // Open the file so we can fstat after unlinking.
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(child.path(), O_RDONLY));
+
+ // Now a stat should return ENOENT but we should still be able to stat
+ // via the open fd and fstat.
+ ASSERT_THAT(unlink(child.path().c_str()), SyscallSucceeds());
+
+ // Since the file has no more links stat should fail.
+ struct stat st_child_after = {};
+ ASSERT_THAT(stat(child.path().c_str(), &st_child_after),
+ SyscallFailsWithErrno(ENOENT));
+
+ // Fstat should still allow us to access the same file via the fd.
+ struct stat st_child_fd = {};
+ ASSERT_THAT(fstat(fd.get(), &st_child_fd), SyscallSucceeds());
+ EXPECT_EQ(st_child_before.st_dev, st_child_fd.st_dev);
+ EXPECT_EQ(st_child_before.st_ino, st_child_fd.st_ino);
+ EXPECT_EQ(st_child_before.st_mode, st_child_fd.st_mode);
+ EXPECT_EQ(st_child_before.st_uid, st_child_fd.st_uid);
+ EXPECT_EQ(st_child_before.st_gid, st_child_fd.st_gid);
+ EXPECT_EQ(st_child_before.st_size, st_child_fd.st_size);
+
+ // TODO: This isn't ideal but since fstatfs(2) will always return
+ // OVERLAYFS_SUPER_MAGIC we have no way to know if this fs is backed by a
+ // gofer which doesn't support links.
+ EXPECT_TRUE(st_child_fd.st_nlink == 0 || st_child_fd.st_nlink == 1);
+}
+
+// Test link counts with a directory as the child.
+TEST_F(StatTest, LinkCountsWithDirChild) {
+ const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+ // Before a child is added the two links are "." and the link from the parent.
+ struct stat st_parent_before = {};
+ ASSERT_THAT(stat(dir.path().c_str(), &st_parent_before), SyscallSucceeds());
+ EXPECT_EQ(st_parent_before.st_nlink, 2);
+
+ // Create a subdirectory and stat for the parent link counts.
+ const TempPath sub_dir =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(dir.path()));
+
+ // The three links are ".", the link from the parent, and the link from
+ // the child as "..".
+ struct stat st_parent_after = {};
+ ASSERT_THAT(stat(dir.path().c_str(), &st_parent_after), SyscallSucceeds());
+ EXPECT_EQ(st_parent_after.st_nlink, 3);
+
+ // The child will have 1 link from the parent and 1 link which represents ".".
+ struct stat st_child = {};
+ ASSERT_THAT(stat(sub_dir.path().c_str(), &st_child), SyscallSucceeds());
+ EXPECT_TRUE(S_ISDIR(st_child.st_mode));
+ EXPECT_EQ(st_child.st_nlink, 2);
+
+ // Finally delete the child dir and the parent link count should return to 2.
+ ASSERT_THAT(rmdir(sub_dir.path().c_str()), SyscallSucceeds());
+ ASSERT_THAT(stat(dir.path().c_str(), &st_parent_after), SyscallSucceeds());
+
+ // Now we should only have links from the parent and "." since the subdir
+ // has been removed.
+ EXPECT_EQ(st_parent_after.st_nlink, 2);
+}
+
+// Test statting a child of a non-directory.
+TEST_F(StatTest, ChildOfNonDir) {
+ // Create a path that has a child of a regular file.
+ const std::string filename = JoinPath(test_file_name_, "child");
+
+ // Statting the path should return ENOTDIR.
+ struct stat st;
+ EXPECT_THAT(lstat(filename.c_str(), &st), SyscallFailsWithErrno(ENOTDIR));
+}
+
+// Verify that we get an ELOOP from too many symbolic links even when there
+// are directories in the middle.
+TEST_F(StatTest, LstatELOOPPath) {
+ const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ std::string subdir_base = "subdir";
+ ASSERT_THAT(mkdir(JoinPath(dir.path(), subdir_base).c_str(), 0755),
+ SyscallSucceeds());
+
+ std::string target = JoinPath(dir.path(), subdir_base, subdir_base);
+ std::string dst = JoinPath("..", subdir_base);
+ ASSERT_THAT(symlink(dst.c_str(), target.c_str()), SyscallSucceeds());
+ auto cleanup = Cleanup(
+ [&target]() { EXPECT_THAT(unlink(target.c_str()), SyscallSucceeds()); });
+
+ // Now build a path which is /subdir/subdir/... repeated many times so that
+ // we can build a path that is shorter than PATH_MAX but can still cause
+ // too many symbolic links. Note: Every other subdir is actually a directory
+ // so we're not in a situation where it's a -> b -> a -> b, where a and b
+ // are symbolic links.
+ std::string path = dir.path();
+ std::string subdir_append = absl::StrCat("/", subdir_base);
+ do {
+ absl::StrAppend(&path, subdir_append);
+ // Keep appending /subdir until we would overflow PATH_MAX.
+ } while ((path.size() + subdir_append.size()) < PATH_MAX);
+
+ struct stat s = {};
+ ASSERT_THAT(lstat(path.c_str(), &s), SyscallFailsWithErrno(ELOOP));
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/stat_times.cc b/test/syscalls/linux/stat_times.cc
new file mode 100644
index 000000000..442957c65
--- /dev/null
+++ b/test/syscalls/linux/stat_times.cc
@@ -0,0 +1,220 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <sys/stat.h>
+
+#include <tuple>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+using ::testing::IsEmpty;
+using ::testing::Not;
+
+class StatTimesTest : public ::testing::Test {
+ protected:
+ std::tuple<absl::Time, absl::Time, absl::Time> GetTime(const TempPath& file) {
+ struct stat statbuf = {};
+ EXPECT_THAT(stat(file.path().c_str(), &statbuf), SyscallSucceeds());
+
+ const auto atime = absl::TimeFromTimespec(statbuf.st_atim);
+ const auto mtime = absl::TimeFromTimespec(statbuf.st_mtim);
+ const auto ctime = absl::TimeFromTimespec(statbuf.st_ctim);
+ return std::make_tuple(atime, mtime, ctime);
+ }
+};
+
+TEST_F(StatTimesTest, FileCreationTimes) {
+ const DisableSave ds; // Timing-related test.
+
+ // Get a time for when the file is created.
+ const absl::Time before = absl::Now() - absl::Seconds(1);
+ const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ const absl::Time after = absl::Now() + absl::Seconds(1);
+
+ absl::Time atime, mtime, ctime;
+ std::tie(atime, mtime, ctime) = GetTime(file);
+
+ EXPECT_LE(before, atime);
+ EXPECT_LE(before, mtime);
+ EXPECT_LE(before, ctime);
+ EXPECT_GE(after, atime);
+ EXPECT_GE(after, mtime);
+ EXPECT_GE(after, ctime);
+}
+
+TEST_F(StatTimesTest, FileCtimeChanges) {
+ auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+ MaybeSave(); // FIXME: ctime is inconsistent.
+
+ absl::Time atime, mtime, ctime;
+ std::tie(atime, mtime, ctime) = GetTime(file);
+
+ absl::SleepFor(absl::Seconds(1));
+
+ // Chmod should only change ctime.
+ EXPECT_THAT(chmod(file.path().c_str(), 0666), SyscallSucceeds());
+
+ absl::Time atime2, mtime2, ctime2;
+ std::tie(atime2, mtime2, ctime2) = GetTime(file);
+ EXPECT_EQ(atime2, atime);
+ EXPECT_EQ(mtime2, mtime);
+ EXPECT_GT(ctime2, ctime);
+
+ absl::SleepFor(absl::Seconds(1));
+
+ // Rename should only change ctime.
+ const auto newpath = NewTempAbsPath();
+ EXPECT_THAT(rename(file.path().c_str(), newpath.c_str()), SyscallSucceeds());
+ file.reset(newpath);
+
+ std::tie(atime, mtime, ctime) = GetTime(file);
+ EXPECT_EQ(atime, atime2);
+ EXPECT_EQ(mtime, mtime2);
+ EXPECT_GT(ctime, ctime2);
+
+ absl::SleepFor(absl::Seconds(1));
+
+ // Utimes should only change ctime and the time that we ask to change (atime
+ // to now in this case).
+ const absl::Time before = absl::Now() - absl::Seconds(1);
+ const struct timespec ts[2] = {{0, UTIME_NOW}, {0, UTIME_OMIT}};
+ ASSERT_THAT(utimensat(AT_FDCWD, file.path().c_str(), ts, 0),
+ SyscallSucceeds());
+ const absl::Time after = absl::Now() + absl::Seconds(1);
+
+ std::tie(atime2, mtime2, ctime2) = GetTime(file);
+ EXPECT_LE(before, atime2);
+ EXPECT_GE(after, atime2);
+ EXPECT_EQ(mtime2, mtime);
+ EXPECT_GT(ctime2, ctime);
+}
+
+TEST_F(StatTimesTest, FileMtimeChanges) {
+ const auto file = ASSERT_NO_ERRNO_AND_VALUE(
+ TempPath::CreateFileWith(GetAbsoluteTestTmpdir(), "yaaass", 0666));
+
+ absl::Time atime, mtime, ctime;
+ std::tie(atime, mtime, ctime) = GetTime(file);
+
+ absl::SleepFor(absl::Seconds(1));
+
+ // Truncate should only change mtime and ctime.
+ EXPECT_THAT(truncate(file.path().c_str(), 0), SyscallSucceeds());
+
+ absl::Time atime2, mtime2, ctime2;
+ std::tie(atime2, mtime2, ctime2) = GetTime(file);
+ EXPECT_EQ(atime2, atime);
+ EXPECT_GT(mtime2, mtime);
+ EXPECT_GT(ctime2, ctime);
+
+ absl::SleepFor(absl::Seconds(1));
+
+ // Write should only change mtime and ctime.
+ const auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR, 0));
+ const std::string contents = "all the single dollars";
+ EXPECT_THAT(write(fd.get(), contents.data(), contents.size()),
+ SyscallSucceeds());
+
+ std::tie(atime, mtime, ctime) = GetTime(file);
+ EXPECT_EQ(atime, atime2);
+ EXPECT_GT(mtime, mtime2);
+ EXPECT_GT(ctime, ctime2);
+}
+
+TEST_F(StatTimesTest, FileAtimeChanges) {
+ const std::string contents = "bills bills bills";
+ const auto file = ASSERT_NO_ERRNO_AND_VALUE(
+ TempPath::CreateFileWith(GetAbsoluteTestTmpdir(), contents, 0666));
+
+ MaybeSave(); // FIXME: ctime is inconsistent.
+
+ absl::Time atime, mtime, ctime;
+ std::tie(atime, mtime, ctime) = GetTime(file);
+
+ absl::SleepFor(absl::Seconds(1));
+
+ const auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY, 0));
+
+ // Read should only change atime.
+ char buf[20];
+ const absl::Time before = absl::Now() - absl::Seconds(1);
+ int read_result;
+ ASSERT_THAT(read_result = read(fd.get(), buf, sizeof(buf)),
+ SyscallSucceeds());
+ const absl::Time after = absl::Now() + absl::Seconds(1);
+
+ EXPECT_EQ(std::string(buf, read_result), contents);
+
+ absl::Time atime2, mtime2, ctime2;
+ std::tie(atime2, mtime2, ctime2) = GetTime(file);
+
+ EXPECT_LE(before, atime2);
+ EXPECT_GE(after, atime2);
+ EXPECT_GT(atime2, atime);
+ EXPECT_EQ(mtime2, mtime);
+ EXPECT_EQ(ctime2, ctime);
+}
+
+TEST_F(StatTimesTest, DirAtimeChanges) {
+ const auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const auto file =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path()));
+
+ MaybeSave(); // FIXME: ctime is inconsistent.
+
+ absl::Time atime, mtime, ctime;
+ std::tie(atime, mtime, ctime) = GetTime(dir);
+
+ absl::SleepFor(absl::Seconds(1));
+
+ const absl::Time before = absl::Now() - absl::Seconds(1);
+
+ // NOTE: Keep an fd open. This ensures that the inode backing the
+ // directory won't be destroyed before the final GetTime to avoid writing out
+ // timestamps and causing side effects.
+ const auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_RDONLY, 0));
+
+ // Listing the directory contents should only change atime.
+ auto contents = ASSERT_NO_ERRNO_AND_VALUE(ListDir(dir.path(), false));
+ EXPECT_THAT(contents, Not(IsEmpty()));
+
+ const absl::Time after = absl::Now() + absl::Seconds(1);
+
+ absl::Time atime2, mtime2, ctime2;
+ std::tie(atime2, mtime2, ctime2) = GetTime(dir);
+
+ EXPECT_LE(before, atime2);
+ EXPECT_GE(after, atime2);
+ EXPECT_GT(atime2, atime);
+ EXPECT_EQ(mtime2, mtime);
+ EXPECT_EQ(ctime2, ctime);
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/statfs.cc b/test/syscalls/linux/statfs.cc
new file mode 100644
index 000000000..1fc9758c9
--- /dev/null
+++ b/test/syscalls/linux/statfs.cc
@@ -0,0 +1,81 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <sys/statfs.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(StatfsTest, CannotStatBadPath) {
+ auto temp_file = NewTempAbsPathInDir("/tmp");
+
+ struct statfs st;
+ EXPECT_THAT(statfs(temp_file.c_str(), &st), SyscallFailsWithErrno(ENOENT));
+}
+
+TEST(StatfsTest, InternalTmpfs) {
+ auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+
+ struct statfs st;
+ EXPECT_THAT(statfs(temp_file.path().c_str(), &st), SyscallSucceeds());
+}
+
+TEST(StatfsTest, InternalDevShm) {
+ struct statfs st;
+ EXPECT_THAT(statfs("/dev/shm", &st), SyscallSucceeds());
+}
+
+TEST(StatfsTest, NameLen) {
+ struct statfs st;
+ EXPECT_THAT(statfs("/dev/shm", &st), SyscallSucceeds());
+
+ EXPECT_EQ(st.f_namelen, NAME_MAX);
+}
+
+TEST(FstatfsTest, CannotStatBadFd) {
+ struct statfs st;
+ EXPECT_THAT(fstatfs(-1, &st), SyscallFailsWithErrno(EBADF));
+}
+
+TEST(FstatfsTest, InternalTmpfs) {
+ auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(temp_file.path(), O_RDONLY));
+
+ struct statfs st;
+ EXPECT_THAT(fstatfs(fd.get(), &st), SyscallSucceeds());
+}
+
+TEST(FstatfsTest, InternalDevShm) {
+ auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/shm", O_RDONLY));
+
+ struct statfs st;
+ EXPECT_THAT(fstatfs(fd.get(), &st), SyscallSucceeds());
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/sticky.cc b/test/syscalls/linux/sticky.cc
new file mode 100644
index 000000000..563763d10
--- /dev/null
+++ b/test/syscalls/linux/sticky.cc
@@ -0,0 +1,116 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <grp.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "test/util/capability_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+DEFINE_int32(scratch_uid1, 65534, "first scratch UID");
+DEFINE_int32(scratch_gid, 65534, "first scratch GID");
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(StickyTest, StickyBitPermDenied) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID)));
+
+ auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ EXPECT_THAT(chmod(dir.path().c_str(), 0777 | S_ISVTX), SyscallSucceeds());
+ std::string path = JoinPath(dir.path(), "NewDir");
+ ASSERT_THAT(mkdir(path.c_str(), 0755), SyscallSucceeds());
+
+ // Drop privileges and change IDs only in child thread, or else this parent
+ // thread won't be able to open some log files after the test ends.
+ ScopedThread([&] {
+ // Drop privileges.
+ if (HaveCapability(CAP_FOWNER).ValueOrDie()) {
+ EXPECT_NO_ERRNO(SetCapability(CAP_FOWNER, false));
+ }
+
+ // Change EUID and EGID.
+ EXPECT_THAT(syscall(SYS_setresgid, -1, FLAGS_scratch_gid, -1),
+ SyscallSucceeds());
+ EXPECT_THAT(syscall(SYS_setresuid, -1, FLAGS_scratch_uid1, -1),
+ SyscallSucceeds());
+
+ EXPECT_THAT(rmdir(path.c_str()), SyscallFailsWithErrno(EPERM));
+ });
+}
+
+TEST(StickyTest, StickyBitSameUID) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID)));
+
+ auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ EXPECT_THAT(chmod(dir.path().c_str(), 0777 | S_ISVTX), SyscallSucceeds());
+ std::string path = JoinPath(dir.path(), "NewDir");
+ ASSERT_THAT(mkdir(path.c_str(), 0755), SyscallSucceeds());
+
+ // Drop privileges and change IDs only in child thread, or else this parent
+ // thread won't be able to open some log files after the test ends.
+ ScopedThread([&] {
+ // Drop privileges.
+ if (HaveCapability(CAP_FOWNER).ValueOrDie()) {
+ EXPECT_NO_ERRNO(SetCapability(CAP_FOWNER, false));
+ }
+
+ // Change EGID.
+ EXPECT_THAT(syscall(SYS_setresgid, -1, FLAGS_scratch_gid, -1),
+ SyscallSucceeds());
+
+ // We still have the same EUID.
+ EXPECT_THAT(rmdir(path.c_str()), SyscallSucceeds());
+ });
+}
+
+TEST(StickyTest, StickyBitCapFOWNER) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID)));
+
+ auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ EXPECT_THAT(chmod(dir.path().c_str(), 0777 | S_ISVTX), SyscallSucceeds());
+ std::string path = JoinPath(dir.path(), "NewDir");
+ ASSERT_THAT(mkdir(path.c_str(), 0755), SyscallSucceeds());
+
+ // Drop privileges and change IDs only in child thread, or else this parent
+ // thread won't be able to open some log files after the test ends.
+ ScopedThread([&] {
+ // Set PR_SET_KEEPCAPS.
+ EXPECT_THAT(prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0), SyscallSucceeds());
+
+ // Change EUID and EGID.
+ EXPECT_THAT(syscall(SYS_setresgid, -1, FLAGS_scratch_gid, -1),
+ SyscallSucceeds());
+ EXPECT_THAT(syscall(SYS_setresuid, -1, FLAGS_scratch_uid1, -1),
+ SyscallSucceeds());
+
+ EXPECT_NO_ERRNO(SetCapability(CAP_FOWNER, true));
+ EXPECT_THAT(rmdir(path.c_str()), SyscallSucceeds());
+ });
+}
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/symlink.cc b/test/syscalls/linux/symlink.cc
new file mode 100644
index 000000000..cfc87bc8f
--- /dev/null
+++ b/test/syscalls/linux/symlink.cc
@@ -0,0 +1,288 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <string>
+
+#include "gtest/gtest.h"
+#include "test/util/capability_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+mode_t FilePermission(const std::string& path) {
+ struct stat buf = {0};
+ TEST_CHECK(lstat(path.c_str(), &buf) == 0);
+ return buf.st_mode & 0777;
+}
+
+// Test that name collisions are checked on the new link path, not the source
+// path.
+TEST(SymlinkTest, CanCreateSymlinkWithCachedSourceDirent) {
+ const std::string srcname = NewTempAbsPath();
+ const std::string newname = NewTempAbsPath();
+ const std::string basedir = std::string(Dirname(srcname));
+ ASSERT_EQ(basedir, Dirname(newname));
+
+ ASSERT_THAT(chdir(basedir.c_str()), SyscallSucceeds());
+
+ // Open the source node to cause the underlying dirent to be cached. It will
+ // remain cached while we have the file open.
+ int fd;
+ ASSERT_THAT(fd = open(srcname.c_str(), O_CREAT | O_RDWR, 0666),
+ SyscallSucceeds());
+ FileDescriptor fd_closer(fd);
+
+ // Attempt to create a symlink. If the bug exists, this will fail since the
+ // dirent link creation code will check for a name collision on the source
+ // link name.
+ EXPECT_THAT(symlink(std::string(Basename(srcname)).c_str(),
+ std::string(Basename(newname)).c_str()),
+ SyscallSucceeds());
+}
+
+TEST(SymlinkTest, CanCreateSymlinkFile) {
+ const std::string oldname = NewTempAbsPath();
+ const std::string newname = NewTempAbsPath();
+
+ int fd;
+ ASSERT_THAT(fd = open(oldname.c_str(), O_CREAT | O_RDWR, 0666),
+ SyscallSucceeds());
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+
+ EXPECT_THAT(symlink(oldname.c_str(), newname.c_str()), SyscallSucceeds());
+ EXPECT_EQ(FilePermission(newname), 0777);
+
+ auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink(newname));
+ EXPECT_EQ(oldname, link);
+
+ EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds());
+ EXPECT_THAT(unlink(oldname.c_str()), SyscallSucceeds());
+}
+
+TEST(SymlinkTest, CanCreateSymlinkDir) {
+ const std::string olddir = NewTempAbsPath();
+ const std::string newdir = NewTempAbsPath();
+
+ EXPECT_THAT(mkdir(olddir.c_str(), 0777), SyscallSucceeds());
+ EXPECT_THAT(symlink(olddir.c_str(), newdir.c_str()), SyscallSucceeds());
+ EXPECT_EQ(FilePermission(newdir), 0777);
+
+ auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink(newdir));
+ EXPECT_EQ(olddir, link);
+
+ EXPECT_THAT(unlink(newdir.c_str()), SyscallSucceeds());
+
+ ASSERT_THAT(rmdir(olddir.c_str()), SyscallSucceeds());
+}
+
+TEST(SymlinkTest, CannotCreateSymlinkInReadOnlyDir) {
+ // Drop capabilities that allow us to override file and directory permissions.
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+ const std::string olddir = NewTempAbsPath();
+ ASSERT_THAT(mkdir(olddir.c_str(), 0444), SyscallSucceeds());
+
+ const std::string newdir = NewTempAbsPathInDir(olddir);
+ EXPECT_THAT(symlink(olddir.c_str(), newdir.c_str()),
+ SyscallFailsWithErrno(EACCES));
+
+ ASSERT_THAT(rmdir(olddir.c_str()), SyscallSucceeds());
+}
+
+TEST(SymlinkTest, CannotSymlinkOverExistingFile) {
+ const std::string oldname = NewTempAbsPath();
+ const std::string newname = NewTempAbsPath();
+
+ int oldfd;
+ int newfd;
+ ASSERT_THAT(oldfd = open(oldname.c_str(), O_CREAT | O_RDWR, 0666),
+ SyscallSucceeds());
+ EXPECT_THAT(close(oldfd), SyscallSucceeds());
+ ASSERT_THAT(newfd = open(newname.c_str(), O_CREAT | O_RDWR, 0666),
+ SyscallSucceeds());
+ EXPECT_THAT(close(newfd), SyscallSucceeds());
+
+ EXPECT_THAT(symlink(oldname.c_str(), newname.c_str()),
+ SyscallFailsWithErrno(EEXIST));
+
+ EXPECT_THAT(unlink(oldname.c_str()), SyscallSucceeds());
+ EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds());
+}
+
+TEST(SymlinkTest, OldnameIsEmpty) {
+ const std::string newname = NewTempAbsPath();
+ EXPECT_THAT(symlink("", newname.c_str()), SyscallFailsWithErrno(ENOENT));
+}
+
+TEST(SymlinkTest, OldnameIsDangling) {
+ const std::string newname = NewTempAbsPath();
+ EXPECT_THAT(symlink("/dangling", newname.c_str()), SyscallSucceeds());
+
+ // This is required for S/R random save tests, which pre-run this test
+ // in the same TEST_TMPDIR, which means that we need to clean it for any
+ // operations exclusively creating files, like symlink above.
+ EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds());
+}
+
+TEST(SymlinkTest, NewnameCannotExist) {
+ const std::string newname =
+ JoinPath(GetAbsoluteTestTmpdir(), "thisdoesnotexist", "foo");
+ EXPECT_THAT(symlink("/thisdoesnotmatter", newname.c_str()),
+ SyscallFailsWithErrno(ENOENT));
+}
+
+TEST(SymlinkTest, CanEvaluateLink) {
+ const std::string oldname = NewTempAbsPath();
+ const std::string newname = NewTempAbsPath();
+
+ int fd;
+ ASSERT_THAT(fd = open(oldname.c_str(), O_CREAT | O_RDWR, 0666),
+ SyscallSucceeds());
+ struct stat old;
+ EXPECT_THAT(fstat(fd, &old), SyscallSucceeds());
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+
+ EXPECT_THAT(symlink(oldname.c_str(), newname.c_str()), SyscallSucceeds());
+ EXPECT_EQ(FilePermission(newname), 0777);
+
+ EXPECT_THAT(fd = open(newname.c_str(), O_RDWR, 0666), SyscallSucceeds());
+ struct stat old_linked;
+ EXPECT_THAT(fstat(fd, &old_linked), SyscallSucceeds());
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+
+ // Check that in fact newname points to the file we expect.
+ // FIXME: use only inodes here once they are consistent,
+ // but this is better than nothing.
+ EXPECT_EQ(old.st_dev, old_linked.st_dev);
+ EXPECT_EQ(old.st_mode, old_linked.st_mode);
+ EXPECT_EQ(old.st_size, old_linked.st_size);
+
+ EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds());
+ EXPECT_THAT(unlink(oldname.c_str()), SyscallSucceeds());
+}
+
+TEST(SymlinkTest, TargetIsNotMapped) {
+ const std::string oldname = NewTempAbsPath();
+ const std::string newname = NewTempAbsPath();
+
+ int fd;
+ // Create the target so that when we read the link, it exists.
+ ASSERT_THAT(fd = open(oldname.c_str(), O_CREAT | O_RDWR, 0666),
+ SyscallSucceeds());
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+
+ // Create a symlink called newname that points to oldname.
+ EXPECT_THAT(symlink(oldname.c_str(), newname.c_str()), SyscallSucceeds());
+
+ std::vector<char> buf(1024);
+ int linksize;
+ // Read the link and assert that the oldname is still the same.
+ EXPECT_THAT(linksize = readlink(newname.c_str(), buf.data(), 1024),
+ SyscallSucceeds());
+ EXPECT_EQ(0, strncmp(oldname.c_str(), buf.data(), linksize));
+
+ EXPECT_THAT(unlink(newname.c_str()), SyscallSucceeds());
+ EXPECT_THAT(unlink(oldname.c_str()), SyscallSucceeds());
+}
+
+TEST(SymlinkTest, PreadFromSymlink) {
+ std::string name = NewTempAbsPath();
+ int fd;
+ ASSERT_THAT(fd = open(name.c_str(), O_CREAT, 0644), SyscallSucceeds());
+ ASSERT_THAT(close(fd), SyscallSucceeds());
+
+ std::string linkname = NewTempAbsPath();
+ ASSERT_THAT(symlink(name.c_str(), linkname.c_str()), SyscallSucceeds());
+
+ ASSERT_THAT(fd = open(linkname.c_str(), O_RDONLY), SyscallSucceeds());
+
+ char buf[1024];
+ EXPECT_THAT(pread64(fd, buf, 1024, 0), SyscallSucceeds());
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+
+ EXPECT_THAT(unlink(name.c_str()), SyscallSucceeds());
+ EXPECT_THAT(unlink(linkname.c_str()), SyscallSucceeds());
+}
+
+TEST(SymlinkTest, SymlinkAtDegradedPermissions_NoRandomSave) {
+ // Drop capabilities that allow us to override file and directory permissions.
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+ auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path()));
+
+ int dirfd;
+ ASSERT_THAT(dirfd = open(dir.path().c_str(), O_DIRECTORY, 0),
+ SyscallSucceeds());
+
+ const DisableSave ds; // Permissions are dropped.
+ EXPECT_THAT(fchmod(dirfd, 0), SyscallSucceeds());
+
+ std::string basename = std::string(Basename(file.path()));
+ EXPECT_THAT(symlinkat("/dangling", dirfd, basename.c_str()),
+ SyscallFailsWithErrno(EACCES));
+ EXPECT_THAT(close(dirfd), SyscallSucceeds());
+}
+
+TEST(SymlinkTest, ReadlinkAtDegradedPermissions_NoRandomSave) {
+ // Drop capabilities that allow us to override file and directory permissions.
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+ auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const std::string oldpath = NewTempAbsPathInDir(dir.path());
+ const std::string oldbase = std::string(Basename(oldpath));
+ ASSERT_THAT(symlink("/dangling", oldpath.c_str()), SyscallSucceeds());
+
+ int dirfd;
+ EXPECT_THAT(dirfd = open(dir.path().c_str(), O_DIRECTORY, 0),
+ SyscallSucceeds());
+
+ const DisableSave ds; // Permissions are dropped.
+ EXPECT_THAT(fchmod(dirfd, 0), SyscallSucceeds());
+
+ char buf[1024];
+ int linksize;
+ EXPECT_THAT(linksize = readlinkat(dirfd, oldbase.c_str(), buf, 1024),
+ SyscallFailsWithErrno(EACCES));
+ EXPECT_THAT(close(dirfd), SyscallSucceeds());
+}
+
+TEST(SymlinkTest, ChmodSymlink) {
+ auto target = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ const std::string newpath = NewTempAbsPath();
+ ASSERT_THAT(symlink(target.path().c_str(), newpath.c_str()),
+ SyscallSucceeds());
+ EXPECT_EQ(FilePermission(newpath), 0777);
+ EXPECT_THAT(chmod(newpath.c_str(), 0666), SyscallSucceeds());
+ EXPECT_EQ(FilePermission(newpath), 0777);
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/sync.cc b/test/syscalls/linux/sync.cc
new file mode 100644
index 000000000..5b777b6eb
--- /dev/null
+++ b/test/syscalls/linux/sync.cc
@@ -0,0 +1,60 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <string>
+
+#include "gtest/gtest.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(SyncTest, SyncEverything) {
+ ASSERT_THAT(syscall(SYS_sync), SyscallSucceeds());
+}
+
+TEST(SyncTest, SyncFileSytem) {
+ int fd;
+ auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ ASSERT_THAT(fd = open(f.path().c_str(), O_RDONLY), SyscallSucceeds());
+ EXPECT_THAT(syncfs(fd), SyscallSucceeds());
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+TEST(SyncTest, SyncFromPipe) {
+ int pipes[2];
+ EXPECT_THAT(pipe(pipes), SyscallSucceeds());
+ EXPECT_THAT(syncfs(pipes[0]), SyscallSucceeds());
+ EXPECT_THAT(syncfs(pipes[1]), SyscallSucceeds());
+ EXPECT_THAT(close(pipes[0]), SyscallSucceeds());
+ EXPECT_THAT(close(pipes[1]), SyscallSucceeds());
+}
+
+TEST(SyncTest, CannotSyncFileSytemAtBadFd) {
+ EXPECT_THAT(syncfs(-1), SyscallFailsWithErrno(EBADF));
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/sync_file_range.cc b/test/syscalls/linux/sync_file_range.cc
new file mode 100644
index 000000000..ebe4ca171
--- /dev/null
+++ b/test/syscalls/linux/sync_file_range.cc
@@ -0,0 +1,111 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <string>
+
+#include "gtest/gtest.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(SyncFileRangeTest, TempFileSucceeds) {
+ auto tmpfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ auto f = ASSERT_NO_ERRNO_AND_VALUE(Open(tmpfile.path(), O_RDWR));
+ constexpr char data[] = "some data to sync";
+ int fd = f.get();
+
+ EXPECT_THAT(write(fd, data, sizeof(data)),
+ SyscallSucceedsWithValue(sizeof(data)));
+ EXPECT_THAT(sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WRITE),
+ SyscallSucceeds());
+ EXPECT_THAT(sync_file_range(fd, 0, 0, 0), SyscallSucceeds());
+ EXPECT_THAT(
+ sync_file_range(fd, 0, 0,
+ SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER |
+ SYNC_FILE_RANGE_WAIT_BEFORE),
+ SyscallSucceeds());
+ EXPECT_THAT(sync_file_range(
+ fd, 0, 1, SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER),
+ SyscallSucceeds());
+ EXPECT_THAT(sync_file_range(
+ fd, 1, 0, SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER),
+ SyscallSucceeds());
+}
+
+TEST(SyncFileRangeTest, CannotSyncFileRangeOnUnopenedFd) {
+ auto tmpfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ auto f = ASSERT_NO_ERRNO_AND_VALUE(Open(tmpfile.path(), O_RDWR));
+ constexpr char data[] = "some data to sync";
+ int fd = f.get();
+
+ EXPECT_THAT(write(fd, data, sizeof(data)),
+ SyscallSucceedsWithValue(sizeof(data)));
+
+ pid_t pid = fork();
+ if (pid == 0) {
+ f.reset();
+
+ // fd is now invalid.
+ TEST_CHECK(sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WRITE) == -1);
+ TEST_PCHECK(errno == EBADF);
+ _exit(0);
+ }
+
+ int status = 0;
+ ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+ EXPECT_TRUE(WIFEXITED(status));
+ EXPECT_EQ(WEXITSTATUS(status), 0);
+}
+
+TEST(SyncFileRangeTest, BadArgs) {
+ auto tmpfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ auto f = ASSERT_NO_ERRNO_AND_VALUE(Open(tmpfile.path(), O_RDWR));
+ int fd = f.get();
+
+ EXPECT_THAT(sync_file_range(fd, -1, 0, 0), SyscallFailsWithErrno(EINVAL));
+ EXPECT_THAT(sync_file_range(fd, 0, -1, 0), SyscallFailsWithErrno(EINVAL));
+ EXPECT_THAT(sync_file_range(fd, 8912, INT64_MAX - 4096, 0),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(SyncFileRangeTest, CannotSyncFileRangeWithWaitBefore) {
+ auto tmpfile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ auto f = ASSERT_NO_ERRNO_AND_VALUE(Open(tmpfile.path(), O_RDWR));
+ constexpr char data[] = "some data to sync";
+ int fd = f.get();
+
+ EXPECT_THAT(write(fd, data, sizeof(data)),
+ SyscallSucceedsWithValue(sizeof(data)));
+ if (IsRunningOnGvisor()) {
+ EXPECT_THAT(sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WAIT_BEFORE),
+ SyscallFailsWithErrno(ENOSYS));
+ EXPECT_THAT(
+ sync_file_range(fd, 0, 0,
+ SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE),
+ SyscallFailsWithErrno(ENOSYS));
+ }
+}
+
+} // namespace
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/sysinfo.cc b/test/syscalls/linux/sysinfo.cc
new file mode 100644
index 000000000..a0dd82640
--- /dev/null
+++ b/test/syscalls/linux/sysinfo.cc
@@ -0,0 +1,86 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This is a very simple sanity test to validate that the sysinfo syscall is
+// supported by gvisor and returns sane values.
+#include <sys/syscall.h>
+#include <sys/sysinfo.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(SysinfoTest, SysinfoIsCallable) {
+ struct sysinfo ignored = {};
+ EXPECT_THAT(syscall(SYS_sysinfo, &ignored), SyscallSucceedsWithValue(0));
+}
+
+TEST(SysinfoTest, EfaultProducedOnBadAddress) {
+ // Validate that we return EFAULT when a bad address is provided.
+ // specified by man 2 sysinfo
+ EXPECT_THAT(syscall(SYS_sysinfo, nullptr), SyscallFailsWithErrno(EFAULT));
+}
+
+TEST(SysinfoTest, TotalRamSaneValue) {
+ struct sysinfo s = {};
+ EXPECT_THAT(sysinfo(&s), SyscallSucceedsWithValue(0));
+ EXPECT_GT(s.totalram, 0);
+}
+
+TEST(SysinfoTest, MemunitSet) {
+ struct sysinfo s = {};
+ EXPECT_THAT(sysinfo(&s), SyscallSucceedsWithValue(0));
+ EXPECT_GE(s.mem_unit, 1);
+}
+
+TEST(SysinfoTest, UptimeSaneValue) {
+ struct sysinfo s = {};
+ EXPECT_THAT(sysinfo(&s), SyscallSucceedsWithValue(0));
+ EXPECT_GE(s.uptime, 0);
+}
+
+TEST(SysinfoTest, UptimeIncreasingValue) {
+ struct sysinfo s = {};
+ EXPECT_THAT(sysinfo(&s), SyscallSucceedsWithValue(0));
+ absl::SleepFor(absl::Seconds(2));
+ struct sysinfo s2 = {};
+ EXPECT_THAT(sysinfo(&s2), SyscallSucceedsWithValue(0));
+ EXPECT_LT(s.uptime, s2.uptime);
+}
+
+TEST(SysinfoTest, FreeRamSaneValue) {
+ struct sysinfo s = {};
+ EXPECT_THAT(sysinfo(&s), SyscallSucceedsWithValue(0));
+ EXPECT_GT(s.freeram, 0);
+ EXPECT_LT(s.freeram, s.totalram);
+}
+
+TEST(SysinfoTest, NumProcsSaneValue) {
+ struct sysinfo s = {};
+ EXPECT_THAT(sysinfo(&s), SyscallSucceedsWithValue(0));
+ EXPECT_GT(s.procs, 0);
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/syslog.cc b/test/syscalls/linux/syslog.cc
new file mode 100644
index 000000000..5bd0d1cc3
--- /dev/null
+++ b/test/syscalls/linux/syslog.cc
@@ -0,0 +1,51 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/klog.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+constexpr int SYSLOG_ACTION_READ_ALL = 3;
+constexpr int SYSLOG_ACTION_SIZE_BUFFER = 10;
+
+int Syslog(int type, char* buf, int len) {
+ return syscall(__NR_syslog, type, buf, len);
+}
+
+// Only SYSLOG_ACTION_SIZE_BUFFER and SYSLOG_ACTION_READ_ALL are implemented in
+// gVisor.
+
+TEST(Syslog, Size) {
+ EXPECT_THAT(Syslog(SYSLOG_ACTION_SIZE_BUFFER, nullptr, 0), SyscallSucceeds());
+}
+
+TEST(Syslog, ReadAll) {
+ // There might not be anything to read, so we can't check the write count.
+ char buf[100];
+ EXPECT_THAT(Syslog(SYSLOG_ACTION_READ_ALL, buf, sizeof(buf)),
+ SyscallSucceeds());
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/sysret.cc b/test/syscalls/linux/sysret.cc
new file mode 100644
index 000000000..8e10220eb
--- /dev/null
+++ b/test/syscalls/linux/sysret.cc
@@ -0,0 +1,113 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Tests to verify that the behavior of linux and gvisor matches when
+// 'sysret' returns to bad (aka non-canonical) %rip or %rsp.
+#include <sys/ptrace.h>
+#include <sys/user.h>
+
+#include "gtest/gtest.h"
+#include "test/util/logging.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+constexpr uint64_t kNonCanonicalRip = 0xCCCC000000000000;
+constexpr uint64_t kNonCanonicalRsp = 0xFFFF000000000000;
+
+class SysretTest : public ::testing::Test {
+ protected:
+ struct user_regs_struct regs_;
+ pid_t child_;
+
+ void SetUp() override {
+ pid_t pid = fork();
+
+ // Child.
+ if (pid == 0) {
+ TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, 0, 0) == 0);
+ MaybeSave();
+ TEST_PCHECK(raise(SIGSTOP) == 0);
+ MaybeSave();
+ _exit(0);
+ }
+
+ // Parent.
+ int status;
+ ASSERT_THAT(pid, SyscallSucceeds()); // Might still be < 0.
+ ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+ EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
+ ASSERT_THAT(ptrace(PTRACE_GETREGS, pid, 0, &regs_), SyscallSucceeds());
+
+ child_ = pid;
+ }
+
+ void Detach() {
+ ASSERT_THAT(ptrace(PTRACE_DETACH, child_, 0, 0), SyscallSucceeds());
+ }
+
+ void SetRip(uint64_t newrip) {
+ regs_.rip = newrip;
+ ASSERT_THAT(ptrace(PTRACE_SETREGS, child_, 0, &regs_), SyscallSucceeds());
+ }
+
+ void SetRsp(uint64_t newrsp) {
+ regs_.rsp = newrsp;
+ ASSERT_THAT(ptrace(PTRACE_SETREGS, child_, 0, &regs_), SyscallSucceeds());
+ }
+
+ // Wait waits for the child pid and returns the exit status.
+ int Wait() {
+ int status;
+ while (true) {
+ int rval = wait4(child_, &status, 0, NULL);
+ if (rval < 0) {
+ return rval;
+ }
+ if (rval == child_) {
+ return status;
+ }
+ }
+ }
+};
+
+TEST_F(SysretTest, JustDetach) {
+ Detach();
+ int status = Wait();
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << "status = " << status;
+}
+
+TEST_F(SysretTest, BadRip) {
+ SetRip(kNonCanonicalRip);
+ Detach();
+ int status = Wait();
+ EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGSEGV)
+ << "status = " << status;
+}
+
+TEST_F(SysretTest, BadRsp) {
+ SetRsp(kNonCanonicalRsp);
+ Detach();
+ int status = Wait();
+ EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGBUS)
+ << "status = " << status;
+}
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/tcp_socket.cc b/test/syscalls/linux/tcp_socket.cc
new file mode 100644
index 000000000..e6fe84ded
--- /dev/null
+++ b/test/syscalls/linux/tcp_socket.cc
@@ -0,0 +1,759 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <sys/poll.h>
+#include <sys/socket.h>
+#include <unistd.h>
+#include <limits>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/posix_error.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+PosixErrorOr<sockaddr_storage> InetLoopbackAddr(int family) {
+ struct sockaddr_storage addr;
+ memset(&addr, 0, sizeof(addr));
+ addr.ss_family = family;
+ switch (family) {
+ case AF_INET:
+ reinterpret_cast<struct sockaddr_in*>(&addr)->sin_addr.s_addr =
+ htonl(INADDR_LOOPBACK);
+ break;
+ case AF_INET6:
+ reinterpret_cast<struct sockaddr_in6*>(&addr)->sin6_addr =
+ in6addr_loopback;
+ break;
+ default:
+ return PosixError(EINVAL,
+ absl::StrCat("unknown socket family: ", family));
+ }
+ return addr;
+}
+
+// Fixture for tests parameterized by the address family to use (AF_INET and
+// AF_INET6) when creating sockets.
+class TcpSocketTest : public ::testing::TestWithParam<int> {
+ protected:
+ // Creates three sockets that will be used by test cases -- a listener, one
+ // that connects, and the accepted one.
+ void SetUp() override;
+
+ // Closes the sockets created by SetUp().
+ void TearDown() override;
+
+ // Listening socket.
+ int listener_ = -1;
+
+ // Socket connected via connect().
+ int s_ = -1;
+
+ // Socket connected via accept().
+ int t_ = -1;
+
+ // Initial size of the send buffer.
+ int sendbuf_size_ = -1;
+};
+
+void TcpSocketTest::SetUp() {
+ ASSERT_THAT(listener_ = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP),
+ SyscallSucceeds());
+
+ ASSERT_THAT(s_ = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP),
+ SyscallSucceeds());
+
+ // Initialize address to the loopback one.
+ sockaddr_storage addr =
+ ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam()));
+ socklen_t addrlen = sizeof(addr);
+
+ // Bind to some port then start listening.
+ ASSERT_THAT(
+ bind(listener_, reinterpret_cast<struct sockaddr*>(&addr), addrlen),
+ SyscallSucceeds());
+
+ ASSERT_THAT(listen(listener_, SOMAXCONN), SyscallSucceeds());
+
+ // Get the address we're listening on, then connect to it. We need to do this
+ // because we're allowing the stack to pick a port for us.
+ ASSERT_THAT(getsockname(listener_, reinterpret_cast<struct sockaddr*>(&addr),
+ &addrlen),
+ SyscallSucceeds());
+
+ ASSERT_THAT(RetryEINTR(connect)(s_, reinterpret_cast<struct sockaddr*>(&addr),
+ addrlen),
+ SyscallSucceeds());
+
+ // Get the initial send buffer size.
+ socklen_t optlen = sizeof(sendbuf_size_);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &sendbuf_size_, &optlen),
+ SyscallSucceeds());
+
+ // Accept the connection.
+ ASSERT_THAT(t_ = RetryEINTR(accept)(listener_, nullptr, nullptr),
+ SyscallSucceeds());
+}
+
+void TcpSocketTest::TearDown() {
+ EXPECT_THAT(close(listener_), SyscallSucceeds());
+ if (s_ >= 0) {
+ EXPECT_THAT(close(s_), SyscallSucceeds());
+ }
+ if (t_ >= 0) {
+ EXPECT_THAT(close(t_), SyscallSucceeds());
+ }
+}
+
+TEST_P(TcpSocketTest, DataCoalesced) {
+ char buf[10];
+
+ // Write in two steps.
+ ASSERT_THAT(RetryEINTR(write)(s_, buf, sizeof(buf) / 2),
+ SyscallSucceedsWithValue(sizeof(buf) / 2));
+ ASSERT_THAT(RetryEINTR(write)(s_, buf, sizeof(buf) / 2),
+ SyscallSucceedsWithValue(sizeof(buf) / 2));
+
+ // Allow stack to process both packets.
+ absl::SleepFor(absl::Seconds(1));
+
+ // Read in one shot.
+ EXPECT_THAT(RetryEINTR(recv)(t_, buf, sizeof(buf), 0),
+ SyscallSucceedsWithValue(sizeof(buf)));
+}
+
+TEST_P(TcpSocketTest, SenderAddressIgnored) {
+ char buf[3];
+ ASSERT_THAT(RetryEINTR(write)(s_, buf, sizeof(buf)),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ struct sockaddr_storage addr;
+ socklen_t addrlen = sizeof(addr);
+ memset(&addr, 0, sizeof(addr));
+
+ ASSERT_THAT(
+ RetryEINTR(recvfrom)(t_, buf, sizeof(buf), 0,
+ reinterpret_cast<struct sockaddr*>(&addr), &addrlen),
+ SyscallSucceedsWithValue(3));
+
+ // Check that addr remains zeroed-out.
+ const char* ptr = reinterpret_cast<char*>(&addr);
+ for (size_t i = 0; i < sizeof(addr); i++) {
+ EXPECT_EQ(ptr[i], 0);
+ }
+}
+
+TEST_P(TcpSocketTest, SenderAddressIgnoredOnPeek) {
+ char buf[3];
+ ASSERT_THAT(RetryEINTR(write)(s_, buf, sizeof(buf)),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ struct sockaddr_storage addr;
+ socklen_t addrlen = sizeof(addr);
+ memset(&addr, 0, sizeof(addr));
+
+ ASSERT_THAT(
+ RetryEINTR(recvfrom)(t_, buf, sizeof(buf), MSG_PEEK,
+ reinterpret_cast<struct sockaddr*>(&addr), &addrlen),
+ SyscallSucceedsWithValue(3));
+
+ // Check that addr remains zeroed-out.
+ const char* ptr = reinterpret_cast<char*>(&addr);
+ for (size_t i = 0; i < sizeof(addr); i++) {
+ EXPECT_EQ(ptr[i], 0);
+ }
+}
+
+TEST_P(TcpSocketTest, SendtoAddressIgnored) {
+ struct sockaddr_storage addr;
+ memset(&addr, 0, sizeof(addr));
+ addr.ss_family = GetParam(); // FIXME
+
+ char data = '\0';
+ EXPECT_THAT(
+ RetryEINTR(sendto)(s_, &data, sizeof(data), 0,
+ reinterpret_cast<sockaddr*>(&addr), sizeof(addr)),
+ SyscallSucceedsWithValue(1));
+}
+
+TEST_P(TcpSocketTest, WritevZeroIovec) {
+ // 2 bytes just to be safe and have vecs[1] not point to something random
+ // (even though length is 0).
+ char buf[2];
+ char recv_buf[1];
+
+ // Construct a vec where the final vector is of length 0.
+ iovec vecs[2] = {};
+ vecs[0].iov_base = buf;
+ vecs[0].iov_len = 1;
+ vecs[1].iov_base = buf + 1;
+ vecs[1].iov_len = 0;
+
+ EXPECT_THAT(RetryEINTR(writev)(s_, vecs, 2), SyscallSucceedsWithValue(1));
+
+ EXPECT_THAT(RetryEINTR(recv)(t_, recv_buf, 1, 0),
+ SyscallSucceedsWithValue(1));
+ EXPECT_EQ(memcmp(recv_buf, buf, 1), 0);
+}
+
+TEST_P(TcpSocketTest, ZeroWriteAllowed) {
+ char buf[3];
+ // Send a zero length packet.
+ ASSERT_THAT(RetryEINTR(write)(s_, buf, 0), SyscallSucceedsWithValue(0));
+ // Verify that there is no packet available.
+ EXPECT_THAT(RetryEINTR(recv)(t_, buf, sizeof(buf), MSG_DONTWAIT),
+ SyscallFailsWithErrno(EAGAIN));
+}
+
+// Test that a non-blocking write with a buffer that is larger than the send
+// buffer size will not actually write the whole thing at once.
+TEST_P(TcpSocketTest, NonblockingLargeWrite) {
+ // Set the FD to O_NONBLOCK.
+ int opts;
+ ASSERT_THAT(opts = fcntl(s_, F_GETFL), SyscallSucceeds());
+ opts |= O_NONBLOCK;
+ ASSERT_THAT(fcntl(s_, F_SETFL, opts), SyscallSucceeds());
+
+ // Allocate a buffer three times the size of the send buffer. We do this with
+ // a vector to avoid allocating on the stack.
+ int size = 3 * sendbuf_size_;
+ std::vector<char> buf(size);
+
+ // Try to write the whole thing.
+ int n;
+ ASSERT_THAT(n = RetryEINTR(write)(s_, buf.data(), size), SyscallSucceeds());
+
+ // We should have written something, but not the whole thing.
+ EXPECT_GT(n, 0);
+ EXPECT_LT(n, size);
+}
+
+// Test that a blocking write with a buffer that is larger than the send buffer
+// will block until the entire buffer is sent.
+TEST_P(TcpSocketTest, BlockingLargeWrite_NoRandomSave) {
+ // Allocate a buffer three times the size of the send buffer on the heap. We
+ // do this as a vector to avoid allocating on the stack.
+ int size = 3 * sendbuf_size_;
+ std::vector<char> writebuf(size);
+
+ // Start reading the response in a loop.
+ int read_bytes = 0;
+ ScopedThread t([this, &read_bytes]() {
+ // Avoid interrupting the blocking write in main thread.
+ const DisableSave ds;
+ char readbuf[2500] = {};
+ int n = -1;
+ while (n != 0) {
+ EXPECT_THAT(n = RetryEINTR(read)(t_, &readbuf, sizeof(readbuf)),
+ SyscallSucceeds());
+ read_bytes += n;
+ }
+ });
+
+ // Try to write the whole thing.
+ int n;
+ ASSERT_THAT(n = WriteFd(s_, writebuf.data(), size), SyscallSucceeds());
+
+ // We should have written the whole thing.
+ EXPECT_EQ(n, size);
+ EXPECT_THAT(close(s_), SyscallSucceedsWithValue(0));
+ s_ = -1;
+ t.Join();
+
+ // We should have read the whole thing.
+ EXPECT_EQ(read_bytes, size);
+}
+
+// Test that a send with MSG_DONTWAIT flag and buffer that larger than the send
+// buffer size will not write the whole thing.
+TEST_P(TcpSocketTest, LargeSendDontWait) {
+ // Allocate a buffer three times the size of the send buffer. We do this on
+ // with a vector to avoid allocating on the stack.
+ int size = 3 * sendbuf_size_;
+ std::vector<char> buf(size);
+
+ // Try to write the whole thing with MSG_DONTWAIT flag, which can
+ // return a partial write.
+ int n;
+ ASSERT_THAT(n = RetryEINTR(send)(s_, buf.data(), size, MSG_DONTWAIT),
+ SyscallSucceeds());
+
+ // We should have written something, but not the whole thing.
+ EXPECT_GT(n, 0);
+ EXPECT_LT(n, size);
+}
+
+// Test that a send on a non-blocking socket with a buffer that larger than the
+// send buffer will not write the whole thing at once.
+TEST_P(TcpSocketTest, NonblockingLargeSend) {
+ // Set the FD to O_NONBLOCK.
+ int opts;
+ ASSERT_THAT(opts = fcntl(s_, F_GETFL), SyscallSucceeds());
+ opts |= O_NONBLOCK;
+ ASSERT_THAT(fcntl(s_, F_SETFL, opts), SyscallSucceeds());
+
+ // Allocate a buffer three times the size of the send buffer. We do this on
+ // with a vector to avoid allocating on the stack.
+ int size = 3 * sendbuf_size_;
+ std::vector<char> buf(size);
+
+ // Try to write the whole thing.
+ int n;
+ ASSERT_THAT(n = RetryEINTR(send)(s_, buf.data(), size, 0), SyscallSucceeds());
+
+ // We should have written something, but not the whole thing.
+ EXPECT_GT(n, 0);
+ EXPECT_LT(n, size);
+}
+
+// Same test as above, but calls send instead of write.
+TEST_P(TcpSocketTest, BlockingLargeSend_NoRandomSave) {
+ // Allocate a buffer three times the size of the send buffer. We do this on
+ // with a vector to avoid allocating on the stack.
+ int size = 3 * sendbuf_size_;
+ std::vector<char> writebuf(size);
+
+ // Start reading the response in a loop.
+ int read_bytes = 0;
+ ScopedThread t([this, &read_bytes]() {
+ // Avoid interrupting the blocking write in main thread.
+ const DisableSave ds;
+ char readbuf[2500] = {};
+ int n = -1;
+ while (n != 0) {
+ EXPECT_THAT(n = RetryEINTR(read)(t_, &readbuf, sizeof(readbuf)),
+ SyscallSucceeds());
+ read_bytes += n;
+ }
+ });
+
+ // Try to send the whole thing.
+ int n;
+ ASSERT_THAT(n = SendFd(s_, writebuf.data(), size, 0), SyscallSucceeds());
+
+ // We should have written the whole thing.
+ EXPECT_EQ(n, size);
+ EXPECT_THAT(close(s_), SyscallSucceedsWithValue(0));
+ s_ = -1;
+ t.Join();
+
+ // We should have read the whole thing.
+ EXPECT_EQ(read_bytes, size);
+}
+
+// Test that polling on a socket with a full send buffer will block.
+TEST_P(TcpSocketTest, PollWithFullBufferBlocks) {
+ // Set the FD to O_NONBLOCK.
+ int opts;
+ ASSERT_THAT(opts = fcntl(s_, F_GETFL), SyscallSucceeds());
+ opts |= O_NONBLOCK;
+ ASSERT_THAT(fcntl(s_, F_SETFL, opts), SyscallSucceeds());
+
+ // Set TCP_NODELAY, which will cause linux to fill the receive buffer from the
+ // send buffer as quickly as possibly. This way we can fill up both buffers
+ // faster.
+ constexpr int tcp_nodelay_flag = 1;
+ ASSERT_THAT(setsockopt(s_, IPPROTO_TCP, TCP_NODELAY, &tcp_nodelay_flag,
+ sizeof(tcp_nodelay_flag)),
+ SyscallSucceeds());
+
+ // Create a large buffer that will be used for sending.
+ std::vector<char> buf(5 * sendbuf_size_);
+
+ // Write until we receive an error.
+ while (RetryEINTR(send)(s_, buf.data(), buf.size(), 0) != -1) {
+ // Sleep to give linux a chance to move data from the send buffer to the
+ // receive buffer.
+ usleep(10000); // 10ms.
+ }
+ // The last error should have been EWOULDBLOCK.
+ ASSERT_EQ(errno, EWOULDBLOCK);
+}
+
+TEST_P(TcpSocketTest, MsgTrunc) {
+ char sent_data[512];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+ ASSERT_THAT(RetryEINTR(send)(s_, sent_data, sizeof(sent_data), 0),
+ SyscallSucceedsWithValue(sizeof(sent_data)));
+ char received_data[sizeof(sent_data)] = {};
+ ASSERT_THAT(
+ RetryEINTR(recv)(t_, received_data, sizeof(received_data) / 2, MSG_TRUNC),
+ SyscallSucceedsWithValue(sizeof(sent_data) / 2));
+
+ // Check that we didn't get anything.
+ char zeros[sizeof(received_data)] = {};
+ EXPECT_EQ(0, memcmp(zeros, received_data, sizeof(received_data)));
+}
+
+// MSG_CTRUNC is a return flag but linux allows it to be set on input flags
+// without returning an error.
+TEST_P(TcpSocketTest, MsgTruncWithCtrunc) {
+ char sent_data[512];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+ ASSERT_THAT(RetryEINTR(send)(s_, sent_data, sizeof(sent_data), 0),
+ SyscallSucceedsWithValue(sizeof(sent_data)));
+ char received_data[sizeof(sent_data)] = {};
+ ASSERT_THAT(RetryEINTR(recv)(t_, received_data, sizeof(received_data) / 2,
+ MSG_TRUNC | MSG_CTRUNC),
+ SyscallSucceedsWithValue(sizeof(sent_data) / 2));
+
+ // Check that we didn't get anything.
+ char zeros[sizeof(received_data)] = {};
+ EXPECT_EQ(0, memcmp(zeros, received_data, sizeof(received_data)));
+}
+
+// This test will verify that MSG_CTRUNC doesn't do anything when specified
+// on input.
+TEST_P(TcpSocketTest, MsgTruncWithCtruncOnly) {
+ char sent_data[512];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+ ASSERT_THAT(RetryEINTR(send)(s_, sent_data, sizeof(sent_data), 0),
+ SyscallSucceedsWithValue(sizeof(sent_data)));
+ char received_data[sizeof(sent_data)] = {};
+ ASSERT_THAT(RetryEINTR(recv)(t_, received_data, sizeof(received_data) / 2,
+ MSG_CTRUNC),
+ SyscallSucceedsWithValue(sizeof(sent_data) / 2));
+
+ // Since MSG_CTRUNC here had no affect, it should not behave like MSG_TRUNC.
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data) / 2));
+}
+
+TEST_P(TcpSocketTest, MsgTruncLargeSize) {
+ char sent_data[512];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+ ASSERT_THAT(RetryEINTR(send)(s_, sent_data, sizeof(sent_data), 0),
+ SyscallSucceedsWithValue(sizeof(sent_data)));
+ char received_data[sizeof(sent_data) * 2] = {};
+ ASSERT_THAT(
+ RetryEINTR(recv)(t_, received_data, sizeof(received_data), MSG_TRUNC),
+ SyscallSucceedsWithValue(sizeof(sent_data)));
+
+ // Check that we didn't get anything.
+ char zeros[sizeof(received_data)] = {};
+ EXPECT_EQ(0, memcmp(zeros, received_data, sizeof(received_data)));
+}
+
+TEST_P(TcpSocketTest, MsgTruncPeek) {
+ char sent_data[512];
+ RandomizeBuffer(sent_data, sizeof(sent_data));
+ ASSERT_THAT(RetryEINTR(send)(s_, sent_data, sizeof(sent_data), 0),
+ SyscallSucceedsWithValue(sizeof(sent_data)));
+ char received_data[sizeof(sent_data)] = {};
+ ASSERT_THAT(RetryEINTR(recv)(t_, received_data, sizeof(received_data) / 2,
+ MSG_TRUNC | MSG_PEEK),
+ SyscallSucceedsWithValue(sizeof(sent_data) / 2));
+
+ // Check that we didn't get anything.
+ char zeros[sizeof(received_data)] = {};
+ EXPECT_EQ(0, memcmp(zeros, received_data, sizeof(received_data)));
+
+ // Check that we can still get all of the data.
+ ASSERT_THAT(RetryEINTR(recv)(t_, received_data, sizeof(received_data), 0),
+ SyscallSucceedsWithValue(sizeof(sent_data)));
+ EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
+}
+
+TEST_P(TcpSocketTest, NoDelayDefault) {
+ int get = -1;
+ socklen_t get_len = sizeof(get);
+ EXPECT_THAT(getsockopt(s_, IPPROTO_TCP, TCP_NODELAY, &get, &get_len),
+ SyscallSucceedsWithValue(0));
+ EXPECT_EQ(get_len, sizeof(get));
+ EXPECT_EQ(get, kSockOptOff);
+}
+
+TEST_P(TcpSocketTest, SetNoDelay) {
+ ASSERT_THAT(
+ setsockopt(s_, IPPROTO_TCP, TCP_NODELAY, &kSockOptOn, sizeof(kSockOptOn)),
+ SyscallSucceeds());
+
+ int get = -1;
+ socklen_t get_len = sizeof(get);
+ EXPECT_THAT(getsockopt(s_, IPPROTO_TCP, TCP_NODELAY, &get, &get_len),
+ SyscallSucceedsWithValue(0));
+ EXPECT_EQ(get_len, sizeof(get));
+ EXPECT_EQ(get, kSockOptOn);
+
+ ASSERT_THAT(setsockopt(s_, IPPROTO_TCP, TCP_NODELAY, &kSockOptOff,
+ sizeof(kSockOptOff)),
+ SyscallSucceeds());
+
+ EXPECT_THAT(getsockopt(s_, IPPROTO_TCP, TCP_NODELAY, &get, &get_len),
+ SyscallSucceedsWithValue(0));
+ EXPECT_EQ(get_len, sizeof(get));
+ EXPECT_EQ(get, kSockOptOff);
+}
+
+INSTANTIATE_TEST_CASE_P(AllInetTests, TcpSocketTest,
+ ::testing::Values(AF_INET, AF_INET6));
+
+// Fixture for tests parameterized by address family that don't want the fixture
+// to do things.
+using SimpleTcpSocketTest = ::testing::TestWithParam<int>;
+
+TEST_P(SimpleTcpSocketTest, SendUnconnected) {
+ int fd;
+ ASSERT_THAT(fd = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP),
+ SyscallSucceeds());
+ FileDescriptor sock_fd(fd);
+
+ char data = '\0';
+ EXPECT_THAT(RetryEINTR(send)(fd, &data, sizeof(data), 0),
+ SyscallFailsWithErrno(EPIPE));
+}
+
+TEST_P(SimpleTcpSocketTest, SendtoWithoutAddressUnconnected) {
+ int fd;
+ ASSERT_THAT(fd = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP),
+ SyscallSucceeds());
+ FileDescriptor sock_fd(fd);
+
+ char data = '\0';
+ EXPECT_THAT(RetryEINTR(sendto)(fd, &data, sizeof(data), 0, nullptr, 0),
+ SyscallFailsWithErrno(EPIPE));
+}
+
+TEST_P(SimpleTcpSocketTest, SendtoWithAddressUnconnected) {
+ int fd;
+ ASSERT_THAT(fd = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP),
+ SyscallSucceeds());
+ FileDescriptor sock_fd(fd);
+
+ sockaddr_storage addr =
+ ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam()));
+ char data = '\0';
+ EXPECT_THAT(
+ RetryEINTR(sendto)(fd, &data, sizeof(data), 0,
+ reinterpret_cast<sockaddr*>(&addr), sizeof(addr)),
+ SyscallFailsWithErrno(EPIPE));
+}
+
+TEST_P(SimpleTcpSocketTest, GetPeerNameUnconnected) {
+ int fd;
+ ASSERT_THAT(fd = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP),
+ SyscallSucceeds());
+ FileDescriptor sock_fd(fd);
+
+ sockaddr_storage addr;
+ socklen_t addrlen = sizeof(addr);
+ EXPECT_THAT(getpeername(fd, reinterpret_cast<sockaddr*>(&addr), &addrlen),
+ SyscallFailsWithErrno(ENOTCONN));
+}
+
+TEST_P(TcpSocketTest, FullBuffer) {
+ // Set both FDs to be blocking.
+ int flags = 0;
+ ASSERT_THAT(flags = fcntl(s_, F_GETFL), SyscallSucceeds());
+ EXPECT_THAT(fcntl(s_, F_SETFL, flags & ~O_NONBLOCK), SyscallSucceeds());
+ flags = 0;
+ ASSERT_THAT(flags = fcntl(t_, F_GETFL), SyscallSucceeds());
+ EXPECT_THAT(fcntl(t_, F_SETFL, flags & ~O_NONBLOCK), SyscallSucceeds());
+
+ // 2500 was chosen as a small value that can be set on Linux.
+ int set_snd = 2500;
+ EXPECT_THAT(setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &set_snd, sizeof(set_snd)),
+ SyscallSucceedsWithValue(0));
+ int get_snd = -1;
+ socklen_t get_snd_len = sizeof(get_snd);
+ EXPECT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &get_snd, &get_snd_len),
+ SyscallSucceedsWithValue(0));
+ EXPECT_EQ(get_snd_len, sizeof(get_snd));
+ EXPECT_GT(get_snd, 0);
+
+ // 2500 was chosen as a small value that can be set on Linux and gVisor.
+ int set_rcv = 2500;
+ EXPECT_THAT(setsockopt(t_, SOL_SOCKET, SO_RCVBUF, &set_rcv, sizeof(set_rcv)),
+ SyscallSucceedsWithValue(0));
+ int get_rcv = -1;
+ socklen_t get_rcv_len = sizeof(get_rcv);
+ EXPECT_THAT(getsockopt(t_, SOL_SOCKET, SO_RCVBUF, &get_rcv, &get_rcv_len),
+ SyscallSucceedsWithValue(0));
+ EXPECT_EQ(get_rcv_len, sizeof(get_rcv));
+ EXPECT_GE(get_rcv, 2500);
+
+ // Quick sanity test.
+ EXPECT_LT(get_snd + get_rcv, 2500 * IOV_MAX);
+
+ char data[2500] = {};
+ std::vector<struct iovec> iovecs;
+ for (int i = 0; i < IOV_MAX; i++) {
+ struct iovec iov = {};
+ iov.iov_base = data;
+ iov.iov_len = sizeof(data);
+ iovecs.push_back(iov);
+ }
+ ScopedThread t([this, &iovecs]() {
+ int result = -1;
+ EXPECT_THAT(result = RetryEINTR(writev)(s_, iovecs.data(), iovecs.size()),
+ SyscallSucceeds());
+ EXPECT_GT(result, 1);
+ EXPECT_LT(result, sizeof(data) * iovecs.size());
+ });
+
+ char recv = 0;
+ EXPECT_THAT(RetryEINTR(read)(t_, &recv, 1), SyscallSucceedsWithValue(1));
+ EXPECT_THAT(close(t_), SyscallSucceedsWithValue(0));
+ t_ = -1;
+}
+
+TEST_P(SimpleTcpSocketTest, NonBlockingConnectNoListener) {
+ // Initialize address to the loopback one.
+ sockaddr_storage addr =
+ ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam()));
+ socklen_t addrlen = sizeof(addr);
+
+ const FileDescriptor s =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+
+ // Set the FD to O_NONBLOCK.
+ int opts;
+ ASSERT_THAT(opts = fcntl(s.get(), F_GETFL), SyscallSucceeds());
+ opts |= O_NONBLOCK;
+ ASSERT_THAT(fcntl(s.get(), F_SETFL, opts), SyscallSucceeds());
+
+ ASSERT_THAT(RetryEINTR(connect)(
+ s.get(), reinterpret_cast<struct sockaddr*>(&addr), addrlen),
+ SyscallFailsWithErrno(EINPROGRESS));
+
+ // Now polling on the FD with a timeout should return 0 corresponding to no
+ // FDs ready.
+ struct pollfd poll_fd = {s.get(), POLLOUT, 0};
+ EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 10000),
+ SyscallSucceedsWithValue(1));
+
+ int err;
+ socklen_t optlen = sizeof(err);
+ ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_ERROR, &err, &optlen),
+ SyscallSucceeds());
+
+ EXPECT_EQ(err, ECONNREFUSED);
+}
+
+TEST_P(SimpleTcpSocketTest, NonBlockingConnect) {
+ const FileDescriptor listener =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+
+ // Initialize address to the loopback one.
+ sockaddr_storage addr =
+ ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam()));
+ socklen_t addrlen = sizeof(addr);
+
+ // Bind to some port then start listening.
+ ASSERT_THAT(
+ bind(listener.get(), reinterpret_cast<struct sockaddr*>(&addr), addrlen),
+ SyscallSucceeds());
+
+ ASSERT_THAT(listen(listener.get(), SOMAXCONN), SyscallSucceeds());
+
+ FileDescriptor s =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+
+ // Set the FD to O_NONBLOCK.
+ int opts;
+ ASSERT_THAT(opts = fcntl(s.get(), F_GETFL), SyscallSucceeds());
+ opts |= O_NONBLOCK;
+ ASSERT_THAT(fcntl(s.get(), F_SETFL, opts), SyscallSucceeds());
+
+ ASSERT_THAT(getsockname(listener.get(),
+ reinterpret_cast<struct sockaddr*>(&addr), &addrlen),
+ SyscallSucceeds());
+
+ ASSERT_THAT(RetryEINTR(connect)(
+ s.get(), reinterpret_cast<struct sockaddr*>(&addr), addrlen),
+ SyscallFailsWithErrno(EINPROGRESS));
+
+ int t;
+ ASSERT_THAT(t = RetryEINTR(accept)(listener.get(), nullptr, nullptr),
+ SyscallSucceeds());
+
+ // Now polling on the FD with a timeout should return 0 corresponding to no
+ // FDs ready.
+ struct pollfd poll_fd = {s.get(), POLLOUT, 0};
+ EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 10000),
+ SyscallSucceedsWithValue(1));
+
+ int err;
+ socklen_t optlen = sizeof(err);
+ ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_ERROR, &err, &optlen),
+ SyscallSucceeds());
+
+ EXPECT_EQ(err, 0);
+
+ EXPECT_THAT(close(t), SyscallSucceeds());
+}
+
+// Test that we get an ECONNREFUSED with a blocking socket when no one is
+// listening on the other end.
+TEST_P(SimpleTcpSocketTest, BlockingConnectRefused) {
+ FileDescriptor s =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+
+ // Initialize address to the loopback one.
+ sockaddr_storage addr =
+ ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam()));
+ socklen_t addrlen = sizeof(addr);
+
+ ASSERT_THAT(RetryEINTR(connect)(
+ s.get(), reinterpret_cast<struct sockaddr*>(&addr), addrlen),
+ SyscallFailsWithErrno(ECONNREFUSED));
+
+ // Avoiding triggering save in destructor of s.
+ EXPECT_THAT(close(s.release()), SyscallSucceeds());
+}
+
+// Test that we get an ECONNREFUSED with a nonblocking socket.
+TEST_P(SimpleTcpSocketTest, NonBlockingConnectRefused) {
+ FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+ Socket(GetParam(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP));
+
+ // Initialize address to the loopback one.
+ sockaddr_storage addr =
+ ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam()));
+ socklen_t addrlen = sizeof(addr);
+
+ ASSERT_THAT(RetryEINTR(connect)(
+ s.get(), reinterpret_cast<struct sockaddr*>(&addr), addrlen),
+ SyscallFailsWithErrno(EINPROGRESS));
+
+ // We don't need to specify any events to get POLLHUP or POLLERR as these
+ // are added before the poll.
+ struct pollfd poll_fd = {s.get(), /*events=*/0, 0};
+ EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 1000), SyscallSucceedsWithValue(1));
+
+ // The ECONNREFUSED should cause us to be woken up with POLLHUP.
+ EXPECT_NE(poll_fd.revents & (POLLHUP | POLLERR), 0);
+
+ // Avoiding triggering save in destructor of s.
+ EXPECT_THAT(close(s.release()), SyscallSucceeds());
+}
+
+INSTANTIATE_TEST_CASE_P(AllInetTests, SimpleTcpSocketTest,
+ ::testing::Values(AF_INET, AF_INET6));
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/temp_umask.h b/test/syscalls/linux/temp_umask.h
new file mode 100644
index 000000000..f202dfa59
--- /dev/null
+++ b/test/syscalls/linux/temp_umask.h
@@ -0,0 +1,39 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_TEMP_UMASK_H_
+#define GVISOR_TEST_SYSCALLS_TEMP_UMASK_H_
+
+#include <sys/stat.h>
+#include <sys/types.h>
+
+namespace gvisor {
+namespace testing {
+
+class TempUmask {
+ public:
+ // Sets the process umask to `mask`.
+ explicit TempUmask(mode_t mask) : old_mask_(umask(mask)) {}
+
+ // Sets the process umask to its previous value.
+ ~TempUmask() { umask(old_mask_); }
+
+ private:
+ mode_t old_mask_;
+};
+
+} // namespace testing
+} // namespace gvisor
+
+#endif // GVISOR_TEST_SYSCALLS_TEMP_UMASK_H_
diff --git a/test/syscalls/linux/tgkill.cc b/test/syscalls/linux/tgkill.cc
new file mode 100644
index 000000000..2d258ef11
--- /dev/null
+++ b/test/syscalls/linux/tgkill.cc
@@ -0,0 +1,48 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(TgkillTest, InvalidTID) {
+ EXPECT_THAT(tgkill(getpid(), -1, 0), SyscallFailsWithErrno(EINVAL));
+ EXPECT_THAT(tgkill(getpid(), 0, 0), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(TgkillTest, InvalidTGID) {
+ EXPECT_THAT(tgkill(-1, gettid(), 0), SyscallFailsWithErrno(EINVAL));
+ EXPECT_THAT(tgkill(0, gettid(), 0), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(TgkillTest, ValidInput) {
+ EXPECT_THAT(tgkill(getpid(), gettid(), 0), SyscallSucceeds());
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/time.cc b/test/syscalls/linux/time.cc
new file mode 100644
index 000000000..3abcd8098
--- /dev/null
+++ b/test/syscalls/linux/time.cc
@@ -0,0 +1,103 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <time.h>
+
+#include "gtest/gtest.h"
+#include "test/util/proc_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+constexpr long kFudgeSeconds = 5;
+
+// Mimics the time(2) wrapper from glibc prior to 2.15.
+time_t vsyscall_time(time_t* t) {
+ constexpr uint64_t kVsyscallTimeEntry = 0xffffffffff600400;
+ return reinterpret_cast<time_t (*)(time_t*)>(kVsyscallTimeEntry)(t);
+}
+
+TEST(TimeTest, VsyscallTime_Succeeds) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsVsyscallEnabled()));
+
+ time_t t1, t2;
+
+ {
+ const DisableSave ds; // Timing assertions.
+ EXPECT_THAT(time(&t1), SyscallSucceeds());
+ EXPECT_THAT(vsyscall_time(&t2), SyscallSucceeds());
+ }
+
+ // Time should be monotonic.
+ EXPECT_LE(static_cast<long>(t1), static_cast<long>(t2));
+
+ // Check that it's within kFudge seconds.
+ EXPECT_LE(static_cast<long>(t2), static_cast<long>(t1) + kFudgeSeconds);
+
+ // Redo with save.
+ EXPECT_THAT(time(&t1), SyscallSucceeds());
+ EXPECT_THAT(vsyscall_time(&t2), SyscallSucceeds());
+
+ // Time should be monotonic.
+ EXPECT_LE(static_cast<long>(t1), static_cast<long>(t2));
+}
+
+TEST(TimeTest, VsyscallTime_InvalidAddressSIGSEGV) {
+ EXPECT_EXIT(vsyscall_time(reinterpret_cast<time_t*>(0x1)),
+ ::testing::KilledBySignal(SIGSEGV), "");
+}
+int vsyscall_gettimeofday(struct timeval* tv, struct timezone* tz) {
+ constexpr uint64_t kVsyscallGettimeofdayEntry = 0xffffffffff600000;
+ return reinterpret_cast<int (*)(struct timeval*, struct timezone*)>(
+ kVsyscallGettimeofdayEntry)(tv, tz);
+}
+
+TEST(TimeTest, VsyscallGettimeofday_Succeeds) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsVsyscallEnabled()));
+
+ struct timeval tv1, tv2;
+ struct timezone tz1, tz2;
+
+ {
+ const DisableSave ds; // Timing assertions.
+ EXPECT_THAT(gettimeofday(&tv1, &tz1), SyscallSucceeds());
+ EXPECT_THAT(vsyscall_gettimeofday(&tv2, &tz2), SyscallSucceeds());
+ }
+
+ // See above.
+ EXPECT_LE(static_cast<long>(tv1.tv_sec), static_cast<long>(tv2.tv_sec));
+ EXPECT_LE(static_cast<long>(tv2.tv_sec),
+ static_cast<long>(tv1.tv_sec) + kFudgeSeconds);
+
+ // Redo with save.
+ EXPECT_THAT(gettimeofday(&tv1, &tz1), SyscallSucceeds());
+ EXPECT_THAT(vsyscall_gettimeofday(&tv2, &tz2), SyscallSucceeds());
+}
+
+TEST(TimeTest, VsyscallGettimeofday_InvalidAddressSIGSEGV) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsVsyscallEnabled()));
+
+ EXPECT_EXIT(vsyscall_gettimeofday(reinterpret_cast<struct timeval*>(0x1),
+ reinterpret_cast<struct timezone*>(0x1)),
+ ::testing::KilledBySignal(SIGSEGV), "");
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/timerfd.cc b/test/syscalls/linux/timerfd.cc
new file mode 100644
index 000000000..b85321795
--- /dev/null
+++ b/test/syscalls/linux/timerfd.cc
@@ -0,0 +1,238 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <poll.h>
+#include <sys/timerfd.h>
+#include <time.h>
+
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/posix_error.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Wrapper around timerfd_create(2) that returns a FileDescriptor.
+PosixErrorOr<FileDescriptor> TimerfdCreate(int clockid, int flags) {
+ int fd = timerfd_create(clockid, flags);
+ MaybeSave();
+ if (fd < 0) {
+ return PosixError(errno, "timerfd_create failed");
+ }
+ return FileDescriptor(fd);
+}
+
+// In tests that race a timerfd with a sleep, some slack is required because:
+//
+// - Timerfd expirations are asynchronous with respect to nanosleeps.
+//
+// - Because clock_gettime(CLOCK_MONOTONIC) is implemented through the VDSO,
+// it technically uses a closely-related, but distinct, time domain from the
+// CLOCK_MONOTONIC used to trigger timerfd expirations.
+absl::Duration TimerSlack() { return absl::Milliseconds(500); }
+
+TEST(TimerfdTest, IsInitiallyStopped) {
+ auto const tfd = ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(CLOCK_MONOTONIC, 0));
+ struct itimerspec its = {};
+ ASSERT_THAT(timerfd_gettime(tfd.get(), &its), SyscallSucceeds());
+ EXPECT_EQ(0, its.it_value.tv_sec);
+ EXPECT_EQ(0, its.it_value.tv_nsec);
+}
+
+TEST(TimerfdTest, SingleShot) {
+ constexpr absl::Duration kDelay = absl::Seconds(1);
+
+ auto const tfd = ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(CLOCK_MONOTONIC, 0));
+ struct itimerspec its = {};
+ its.it_value = absl::ToTimespec(kDelay);
+ ASSERT_THAT(timerfd_settime(tfd.get(), /* flags = */ 0, &its, nullptr),
+ SyscallSucceeds());
+
+ // The timer should fire exactly once since the interval is zero.
+ absl::SleepFor(kDelay + TimerSlack());
+ uint64_t val = 0;
+ ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)),
+ SyscallSucceedsWithValue(sizeof(uint64_t)));
+ EXPECT_EQ(1, val);
+}
+
+TEST(TimerfdTest, Periodic) {
+ constexpr absl::Duration kDelay = absl::Seconds(1);
+ constexpr int kPeriods = 3;
+
+ auto const tfd = ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(CLOCK_MONOTONIC, 0));
+ struct itimerspec its = {};
+ its.it_value = absl::ToTimespec(kDelay);
+ its.it_interval = absl::ToTimespec(kDelay);
+ ASSERT_THAT(timerfd_settime(tfd.get(), /* flags = */ 0, &its, nullptr),
+ SyscallSucceeds());
+
+ // Expect to see at least kPeriods expirations. More may occur due to the
+ // timer slack, or due to delays from scheduling or save/restore.
+ absl::SleepFor(kPeriods * kDelay + TimerSlack());
+ uint64_t val = 0;
+ ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)),
+ SyscallSucceedsWithValue(sizeof(uint64_t)));
+ EXPECT_GE(val, kPeriods);
+}
+
+TEST(TimerfdTest, BlockingRead) {
+ constexpr absl::Duration kDelay = absl::Seconds(3);
+
+ auto const tfd = ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(CLOCK_MONOTONIC, 0));
+ struct itimerspec its = {};
+ its.it_value.tv_sec = absl::ToInt64Seconds(kDelay);
+ auto const start_time = absl::Now();
+ ASSERT_THAT(timerfd_settime(tfd.get(), /* flags = */ 0, &its, nullptr),
+ SyscallSucceeds());
+
+ // read should block until the timer fires.
+ uint64_t val = 0;
+ ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)),
+ SyscallSucceedsWithValue(sizeof(uint64_t)));
+ auto const end_time = absl::Now();
+ EXPECT_EQ(1, val);
+ EXPECT_GE((end_time - start_time) + TimerSlack(), kDelay);
+}
+
+TEST(TimerfdTest, NonblockingRead_NoRandomSave) {
+ constexpr absl::Duration kDelay = absl::Seconds(5);
+
+ auto const tfd =
+ ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(CLOCK_MONOTONIC, TFD_NONBLOCK));
+
+ // Since the timer is initially disabled and has never fired, read should
+ // return EAGAIN.
+ uint64_t val = 0;
+ ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)),
+ SyscallFailsWithErrno(EAGAIN));
+
+ DisableSave ds; // Timing-sensitive.
+
+ // Arm the timer.
+ struct itimerspec its = {};
+ its.it_value.tv_sec = absl::ToInt64Seconds(kDelay);
+ ASSERT_THAT(timerfd_settime(tfd.get(), /* flags = */ 0, &its, nullptr),
+ SyscallSucceeds());
+
+ // Since the timer has not yet fired, read should return EAGAIN.
+ ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)),
+ SyscallFailsWithErrno(EAGAIN));
+
+ ds.reset(); // No longer timing-sensitive.
+
+ // After the timer fires, read should indicate 1 expiration.
+ absl::SleepFor(kDelay + TimerSlack());
+ ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)),
+ SyscallSucceedsWithValue(sizeof(uint64_t)));
+ EXPECT_EQ(1, val);
+
+ // The successful read should have reset the number of expirations.
+ ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)),
+ SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST(TimerfdTest, BlockingPoll_SetTimeResetsExpirations) {
+ constexpr absl::Duration kDelay = absl::Seconds(3);
+
+ auto const tfd =
+ ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(CLOCK_MONOTONIC, TFD_NONBLOCK));
+ struct itimerspec its = {};
+ its.it_value.tv_sec = absl::ToInt64Seconds(kDelay);
+ auto const start_time = absl::Now();
+ ASSERT_THAT(timerfd_settime(tfd.get(), /* flags = */ 0, &its, nullptr),
+ SyscallSucceeds());
+
+ // poll should block until the timer fires.
+ struct pollfd pfd = {};
+ pfd.fd = tfd.get();
+ pfd.events = POLLIN;
+ ASSERT_THAT(poll(&pfd, /* nfds = */ 1,
+ /* timeout = */ 2 * absl::ToInt64Seconds(kDelay) * 1000),
+ SyscallSucceedsWithValue(1));
+ auto const end_time = absl::Now();
+ EXPECT_EQ(POLLIN, pfd.revents);
+ EXPECT_GE((end_time - start_time) + TimerSlack(), kDelay);
+
+ // Call timerfd_settime again with a value of 0. This should reset the number
+ // of expirations to 0, causing read to return EAGAIN since the timerfd is
+ // non-blocking.
+ its.it_value.tv_sec = 0;
+ ASSERT_THAT(timerfd_settime(tfd.get(), /* flags = */ 0, &its, nullptr),
+ SyscallSucceeds());
+ uint64_t val = 0;
+ ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)),
+ SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST(TimerfdTest, SetAbsoluteTime) {
+ constexpr absl::Duration kDelay = absl::Seconds(3);
+
+ // Use a non-blocking timerfd so that if TFD_TIMER_ABSTIME is incorrectly
+ // non-functional, we get EAGAIN rather than a test timeout.
+ auto const tfd =
+ ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(CLOCK_MONOTONIC, TFD_NONBLOCK));
+ struct itimerspec its = {};
+ ASSERT_THAT(clock_gettime(CLOCK_MONOTONIC, &its.it_value), SyscallSucceeds());
+ its.it_value.tv_sec += absl::ToInt64Seconds(kDelay);
+ ASSERT_THAT(timerfd_settime(tfd.get(), TFD_TIMER_ABSTIME, &its, nullptr),
+ SyscallSucceeds());
+
+ absl::SleepFor(kDelay + TimerSlack());
+ uint64_t val = 0;
+ ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)),
+ SyscallSucceedsWithValue(sizeof(uint64_t)));
+ EXPECT_EQ(1, val);
+}
+
+TEST(TimerfdTest, ClockRealtime) {
+ // Since CLOCK_REALTIME can, by definition, change, we can't make any
+ // non-flaky assertions about the amount of time it takes for a
+ // CLOCK_REALTIME-based timer to expire. Just check that it expires at all,
+ // and hope it happens before the test times out.
+ constexpr int kDelaySecs = 1;
+
+ auto const tfd = ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(CLOCK_REALTIME, 0));
+ struct itimerspec its = {};
+ its.it_value.tv_sec = kDelaySecs;
+ ASSERT_THAT(timerfd_settime(tfd.get(), /* flags = */ 0, &its, nullptr),
+ SyscallSucceeds());
+
+ uint64_t val = 0;
+ ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)),
+ SyscallSucceedsWithValue(sizeof(uint64_t)));
+ EXPECT_EQ(1, val);
+}
+
+TEST(TimerfdTest, IllegalReadWrite) {
+ auto const tfd =
+ ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(CLOCK_MONOTONIC, TFD_NONBLOCK));
+ uint64_t val = 0;
+ EXPECT_THAT(PreadFd(tfd.get(), &val, sizeof(val), 0),
+ SyscallFailsWithErrno(ESPIPE));
+ EXPECT_THAT(WriteFd(tfd.get(), &val, sizeof(val)),
+ SyscallFailsWithErrno(EINVAL));
+ EXPECT_THAT(PwriteFd(tfd.get(), &val, sizeof(val), 0),
+ SyscallFailsWithErrno(ESPIPE));
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/timers.cc b/test/syscalls/linux/timers.cc
new file mode 100644
index 000000000..dfe231575
--- /dev/null
+++ b/test/syscalls/linux/timers.cc
@@ -0,0 +1,642 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <signal.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <syscall.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <atomic>
+
+#include "gtest/gtest.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/cleanup.h"
+#include "test/util/logging.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+DEFINE_bool(timers_test_sleep, false,
+ "If true, sleep forever instead of running tests.");
+
+using ::testing::_;
+using ::testing::AnyOf;
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+#ifndef CPUCLOCK_PROF
+#define CPUCLOCK_PROF 0
+#endif // CPUCLOCK_PROF
+
+PosixErrorOr<absl::Duration> ProcessCPUTime(pid_t pid) {
+ // Use pid-specific CPUCLOCK_PROF, which is the clock used to enforce
+ // RLIMIT_CPU.
+ clockid_t clockid = (~static_cast<clockid_t>(pid) << 3) | CPUCLOCK_PROF;
+
+ struct timespec ts;
+ int ret = clock_gettime(clockid, &ts);
+ if (ret < 0) {
+ return PosixError(errno, "clock_gettime failed");
+ }
+
+ return absl::DurationFromTimespec(ts);
+}
+
+void NoopSignalHandler(int signo) {
+ TEST_CHECK_MSG(SIGXCPU == signo,
+ "NoopSigHandler did not receive expected signal");
+}
+
+void UninstallingSignalHandler(int signo) {
+ TEST_CHECK_MSG(SIGXCPU == signo,
+ "UninstallingSignalHandler did not receive expected signal");
+ struct sigaction rev_action;
+ rev_action.sa_handler = SIG_DFL;
+ rev_action.sa_flags = 0;
+ sigemptyset(&rev_action.sa_mask);
+ sigaction(SIGXCPU, &rev_action, nullptr);
+}
+
+TEST(TimerTest, ProcessKilledOnCPUSoftLimit) {
+ constexpr absl::Duration kSoftLimit = absl::Seconds(1);
+ constexpr absl::Duration kHardLimit = absl::Seconds(3);
+
+ struct rlimit cpu_limits;
+ cpu_limits.rlim_cur = absl::ToInt64Seconds(kSoftLimit);
+ cpu_limits.rlim_max = absl::ToInt64Seconds(kHardLimit);
+
+ int pid = fork();
+ MaybeSave();
+ if (pid == 0) {
+ TEST_PCHECK(setrlimit(RLIMIT_CPU, &cpu_limits) == 0);
+ MaybeSave();
+ for (;;) {
+ }
+ }
+ auto c = Cleanup([pid] {
+ int status;
+ EXPECT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+ EXPECT_TRUE(WIFSIGNALED(status));
+ EXPECT_EQ(WTERMSIG(status), SIGXCPU);
+ });
+
+ // Wait for the child to exit, but do not reap it. This will allow us to check
+ // its CPU usage while it is zombied.
+ EXPECT_THAT(waitid(P_PID, pid, nullptr, WEXITED | WNOWAIT),
+ SyscallSucceeds());
+
+ // Assert that the child spent 1s of CPU before getting killed.
+ //
+ // We must be careful to use CPUCLOCK_PROF, the same clock used for RLIMIT_CPU
+ // enforcement, to get correct results. Note that this is slightly different
+ // from rusage-reported CPU usage:
+ //
+ // RLIMIT_CPU, CPUCLOCK_PROF use kernel/sched/cputime.c:thread_group_cputime.
+ // rusage uses kernel/sched/cputime.c:thread_group_cputime_adjusted.
+ absl::Duration cpu = ASSERT_NO_ERRNO_AND_VALUE(ProcessCPUTime(pid));
+ EXPECT_GE(cpu, kSoftLimit);
+
+ // Child did not make it to the hard limit.
+ //
+ // Linux sends SIGXCPU synchronously with CPU tick updates. See
+ // kernel/time/timer.c:update_process_times:
+ // => account_process_tick // update task CPU usage.
+ // => run_posix_cpu_timers // enforce RLIMIT_CPU, sending signal.
+ //
+ // Thus, only chance for this to flake is if the system time required to
+ // deliver the signal exceeds 2s.
+ EXPECT_LT(cpu, kHardLimit);
+}
+
+TEST(TimerTest, ProcessPingedRepeatedlyAfterCPUSoftLimit) {
+ struct sigaction new_action;
+ new_action.sa_handler = UninstallingSignalHandler;
+ new_action.sa_flags = 0;
+ sigemptyset(&new_action.sa_mask);
+
+ constexpr absl::Duration kSoftLimit = absl::Seconds(1);
+ constexpr absl::Duration kHardLimit = absl::Seconds(10);
+
+ struct rlimit cpu_limits;
+ cpu_limits.rlim_cur = absl::ToInt64Seconds(kSoftLimit);
+ cpu_limits.rlim_max = absl::ToInt64Seconds(kHardLimit);
+
+ int pid = fork();
+ MaybeSave();
+ if (pid == 0) {
+ TEST_PCHECK(sigaction(SIGXCPU, &new_action, nullptr) == 0);
+ MaybeSave();
+ TEST_PCHECK(setrlimit(RLIMIT_CPU, &cpu_limits) == 0);
+ MaybeSave();
+ for (;;) {
+ }
+ }
+ auto c = Cleanup([pid] {
+ int status;
+ EXPECT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+ EXPECT_TRUE(WIFSIGNALED(status));
+ EXPECT_EQ(WTERMSIG(status), SIGXCPU);
+ });
+
+ // Wait for the child to exit, but do not reap it. This will allow us to check
+ // its CPU usage while it is zombied.
+ EXPECT_THAT(waitid(P_PID, pid, nullptr, WEXITED | WNOWAIT),
+ SyscallSucceeds());
+
+ absl::Duration cpu = ASSERT_NO_ERRNO_AND_VALUE(ProcessCPUTime(pid));
+ // Following signals come every CPU second.
+ EXPECT_GE(cpu, kSoftLimit + absl::Seconds(1));
+
+ // Child did not make it to the hard limit.
+ //
+ // As above, should not flake.
+ EXPECT_LT(cpu, kHardLimit);
+}
+
+TEST(TimerTest, ProcessKilledOnCPUHardLimit) {
+ struct sigaction new_action;
+ new_action.sa_handler = NoopSignalHandler;
+ new_action.sa_flags = 0;
+ sigemptyset(&new_action.sa_mask);
+
+ constexpr absl::Duration kSoftLimit = absl::Seconds(1);
+ constexpr absl::Duration kHardLimit = absl::Seconds(3);
+
+ struct rlimit cpu_limits;
+ cpu_limits.rlim_cur = absl::ToInt64Seconds(kSoftLimit);
+ cpu_limits.rlim_max = absl::ToInt64Seconds(kHardLimit);
+
+ int pid = fork();
+ MaybeSave();
+ if (pid == 0) {
+ TEST_PCHECK(sigaction(SIGXCPU, &new_action, nullptr) == 0);
+ MaybeSave();
+ TEST_PCHECK(setrlimit(RLIMIT_CPU, &cpu_limits) == 0);
+ MaybeSave();
+ for (;;) {
+ }
+ }
+ auto c = Cleanup([pid] {
+ int status;
+ EXPECT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid));
+ EXPECT_TRUE(WIFSIGNALED(status));
+ EXPECT_EQ(WTERMSIG(status), SIGKILL);
+ });
+
+ // Wait for the child to exit, but do not reap it. This will allow us to check
+ // its CPU usage while it is zombied.
+ EXPECT_THAT(waitid(P_PID, pid, nullptr, WEXITED | WNOWAIT),
+ SyscallSucceeds());
+
+ absl::Duration cpu = ASSERT_NO_ERRNO_AND_VALUE(ProcessCPUTime(pid));
+ EXPECT_GE(cpu, kHardLimit);
+}
+
+// RAII type for a kernel "POSIX" interval timer. (The kernel provides system
+// calls such as timer_create that behave very similarly, but not identically,
+// to those described by timer_create(2); in particular, the kernel does not
+// implement SIGEV_THREAD. glibc builds POSIX-compliant interval timers based on
+// these kernel interval timers.)
+//
+// Compare implementation to FileDescriptor.
+class IntervalTimer {
+ public:
+ IntervalTimer() = default;
+
+ explicit IntervalTimer(int id) { set_id(id); }
+
+ IntervalTimer(IntervalTimer&& orig) : id_(orig.release()) {}
+
+ IntervalTimer& operator=(IntervalTimer&& orig) {
+ if (this == &orig) return *this;
+ reset(orig.release());
+ return *this;
+ }
+
+ IntervalTimer(const IntervalTimer& other) = delete;
+ IntervalTimer& operator=(const IntervalTimer& other) = delete;
+
+ ~IntervalTimer() { reset(); }
+
+ int get() const { return id_; }
+
+ int release() {
+ int const id = id_;
+ id_ = -1;
+ return id;
+ }
+
+ void reset() { reset(-1); }
+
+ void reset(int id) {
+ if (id_ >= 0) {
+ TEST_PCHECK(syscall(SYS_timer_delete, id_) == 0);
+ MaybeSave();
+ }
+ set_id(id);
+ }
+
+ PosixErrorOr<struct itimerspec> Set(
+ int flags, const struct itimerspec& new_value) const {
+ struct itimerspec old_value = {};
+ if (syscall(SYS_timer_settime, id_, flags, &new_value, &old_value) < 0) {
+ return PosixError(errno, "timer_settime");
+ }
+ MaybeSave();
+ return old_value;
+ }
+
+ PosixErrorOr<struct itimerspec> Get() const {
+ struct itimerspec curr_value = {};
+ if (syscall(SYS_timer_gettime, id_, &curr_value) < 0) {
+ return PosixError(errno, "timer_gettime");
+ }
+ MaybeSave();
+ return curr_value;
+ }
+
+ PosixErrorOr<int> Overruns() const {
+ int rv = syscall(SYS_timer_getoverrun, id_);
+ if (rv < 0) {
+ return PosixError(errno, "timer_getoverrun");
+ }
+ MaybeSave();
+ return rv;
+ }
+
+ private:
+ void set_id(int id) { id_ = std::max(id, -1); }
+
+ // Kernel timer_t is int; glibc timer_t is void*.
+ int id_ = -1;
+};
+
+PosixErrorOr<IntervalTimer> TimerCreate(clockid_t clockid,
+ const struct sigevent& sev) {
+ int timerid;
+ if (syscall(SYS_timer_create, clockid, &sev, &timerid) < 0) {
+ return PosixError(errno, "timer_create");
+ }
+ MaybeSave();
+ return IntervalTimer(timerid);
+}
+
+// See timerfd.cc:TimerSlack() for rationale.
+constexpr absl::Duration kTimerSlack = absl::Milliseconds(500);
+
+TEST(IntervalTimerTest, IsInitiallyStopped) {
+ struct sigevent sev = {};
+ sev.sigev_notify = SIGEV_NONE;
+ const auto timer =
+ ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev));
+ const struct itimerspec its = ASSERT_NO_ERRNO_AND_VALUE(timer.Get());
+ EXPECT_EQ(0, its.it_value.tv_sec);
+ EXPECT_EQ(0, its.it_value.tv_nsec);
+}
+
+TEST(IntervalTimerTest, SingleShotSilent) {
+ struct sigevent sev = {};
+ sev.sigev_notify = SIGEV_NONE;
+ const auto timer =
+ ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev));
+
+ constexpr absl::Duration kDelay = absl::Seconds(1);
+ struct itimerspec its = {};
+ its.it_value = absl::ToTimespec(kDelay);
+ ASSERT_NO_ERRNO(timer.Set(0, its));
+
+ // The timer should count down to 0 and stop since the interval is zero. No
+ // overruns should be counted.
+ absl::SleepFor(kDelay + kTimerSlack);
+ its = ASSERT_NO_ERRNO_AND_VALUE(timer.Get());
+ EXPECT_EQ(0, its.it_value.tv_sec);
+ EXPECT_EQ(0, its.it_value.tv_nsec);
+ EXPECT_THAT(timer.Overruns(), IsPosixErrorOkAndHolds(0));
+}
+
+TEST(IntervalTimerTest, PeriodicSilent) {
+ struct sigevent sev = {};
+ sev.sigev_notify = SIGEV_NONE;
+ const auto timer =
+ ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev));
+
+ constexpr absl::Duration kPeriod = absl::Seconds(1);
+ struct itimerspec its = {};
+ its.it_value = its.it_interval = absl::ToTimespec(kPeriod);
+ ASSERT_NO_ERRNO(timer.Set(0, its));
+
+ absl::SleepFor(kPeriod * 3 + kTimerSlack);
+
+ // The timer should still be running.
+ its = ASSERT_NO_ERRNO_AND_VALUE(timer.Get());
+ EXPECT_TRUE(its.it_value.tv_nsec != 0 || its.it_value.tv_sec != 0);
+
+ // Timer expirations are not counted as overruns under SIGEV_NONE.
+ EXPECT_THAT(timer.Overruns(), IsPosixErrorOkAndHolds(0));
+}
+
+std::atomic<int> counted_signals;
+
+void IntervalTimerCountingSignalHandler(int sig, siginfo_t* info,
+ void* ucontext) {
+ counted_signals.fetch_add(1 + info->si_overrun);
+}
+
+TEST(IntervalTimerTest, PeriodicGroupDirectedSignal) {
+ constexpr int kSigno = SIGUSR1;
+ constexpr int kSigvalue = 42;
+
+ // Install our signal handler.
+ counted_signals.store(0);
+ struct sigaction sa = {};
+ sa.sa_sigaction = IntervalTimerCountingSignalHandler;
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = SA_SIGINFO;
+ const auto scoped_sigaction =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(kSigno, sa));
+
+ // Ensure that kSigno is unblocked on at least one thread.
+ const auto scoped_sigmask =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, kSigno));
+
+ struct sigevent sev = {};
+ sev.sigev_notify = SIGEV_SIGNAL;
+ sev.sigev_signo = kSigno;
+ sev.sigev_value.sival_int = kSigvalue;
+ auto timer = ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev));
+
+ constexpr absl::Duration kPeriod = absl::Seconds(1);
+ constexpr int kCycles = 3;
+ struct itimerspec its = {};
+ its.it_value = its.it_interval = absl::ToTimespec(kPeriod);
+ ASSERT_NO_ERRNO(timer.Set(0, its));
+
+ absl::SleepFor(kPeriod * kCycles + kTimerSlack);
+ EXPECT_GE(counted_signals.load(), kCycles);
+}
+
+// From Linux's include/uapi/asm-generic/siginfo.h.
+#ifndef sigev_notify_thread_id
+#define sigev_notify_thread_id _sigev_un._tid
+#endif
+
+TEST(IntervalTimerTest, PeriodicThreadDirectedSignal) {
+ constexpr int kSigno = SIGUSR1;
+ constexpr int kSigvalue = 42;
+
+ // Block kSigno so that we can accumulate overruns.
+ sigset_t mask;
+ sigemptyset(&mask);
+ sigaddset(&mask, kSigno);
+ const auto scoped_sigmask =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, mask));
+
+ struct sigevent sev = {};
+ sev.sigev_notify = SIGEV_THREAD_ID;
+ sev.sigev_signo = kSigno;
+ sev.sigev_value.sival_int = kSigvalue;
+ sev.sigev_notify_thread_id = gettid();
+ auto timer = ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev));
+
+ constexpr absl::Duration kPeriod = absl::Seconds(1);
+ constexpr int kCycles = 3;
+ struct itimerspec its = {};
+ its.it_value = its.it_interval = absl::ToTimespec(kPeriod);
+ ASSERT_NO_ERRNO(timer.Set(0, its));
+ absl::SleepFor(kPeriod * kCycles + kTimerSlack);
+
+ // At least kCycles expirations should have occurred, resulting in kCycles-1
+ // overruns (the first expiration sent the signal successfully).
+ siginfo_t si;
+ struct timespec zero_ts = absl::ToTimespec(absl::ZeroDuration());
+ ASSERT_THAT(sigtimedwait(&mask, &si, &zero_ts),
+ SyscallSucceedsWithValue(kSigno));
+ EXPECT_EQ(si.si_signo, kSigno);
+ EXPECT_EQ(si.si_code, SI_TIMER);
+ EXPECT_EQ(si.si_timerid, timer.get());
+ EXPECT_GE(si.si_overrun, kCycles - 1);
+ EXPECT_EQ(si.si_int, kSigvalue);
+
+ // Kill the timer, then drain any additional signal it may have enqueued. We
+ // can't do this before the preceding sigtimedwait because stopping or
+ // deleting the timer resets si_overrun to 0.
+ timer.reset();
+ sigtimedwait(&mask, &si, &zero_ts);
+}
+
+TEST(IntervalTimerTest, OtherThreadGroup) {
+ constexpr int kSigno = SIGUSR1;
+
+ // Create a subprocess that does nothing until killed.
+ pid_t child_pid;
+ const auto sp = ASSERT_NO_ERRNO_AND_VALUE(ForkAndExec(
+ "/proc/self/exe", ExecveArray({"timers", "--timers_test_sleep"}),
+ ExecveArray(), &child_pid, nullptr));
+
+ // Verify that we can't create a timer that would send signals to it.
+ struct sigevent sev = {};
+ sev.sigev_notify = SIGEV_THREAD_ID;
+ sev.sigev_signo = kSigno;
+ sev.sigev_notify_thread_id = child_pid;
+ EXPECT_THAT(TimerCreate(CLOCK_MONOTONIC, sev), PosixErrorIs(EINVAL, _));
+}
+
+TEST(IntervalTimerTest, RealTimeSignalsAreNotDuplicated) {
+ const int kSigno = SIGRTMIN;
+ constexpr int kSigvalue = 42;
+
+ // Block signo so that we can accumulate overruns.
+ sigset_t mask;
+ sigemptyset(&mask);
+ sigaddset(&mask, kSigno);
+ const auto scoped_sigmask = ScopedSignalMask(SIG_BLOCK, mask);
+
+ struct sigevent sev = {};
+ sev.sigev_notify = SIGEV_THREAD_ID;
+ sev.sigev_signo = kSigno;
+ sev.sigev_value.sival_int = kSigvalue;
+ sev.sigev_notify_thread_id = gettid();
+ const auto timer =
+ ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev));
+
+ constexpr absl::Duration kPeriod = absl::Seconds(1);
+ constexpr int kCycles = 3;
+ struct itimerspec its = {};
+ its.it_value = its.it_interval = absl::ToTimespec(kPeriod);
+ ASSERT_NO_ERRNO(timer.Set(0, its));
+ absl::SleepFor(kPeriod * kCycles + kTimerSlack);
+
+ // Stop the timer so that no further signals are enqueued after sigtimedwait.
+ struct timespec zero_ts = absl::ToTimespec(absl::ZeroDuration());
+ its.it_value = its.it_interval = zero_ts;
+ ASSERT_NO_ERRNO(timer.Set(0, its));
+
+ // The timer should have sent only a single signal, even though the kernel
+ // supports enqueueing of multiple RT signals.
+ siginfo_t si;
+ ASSERT_THAT(sigtimedwait(&mask, &si, &zero_ts),
+ SyscallSucceedsWithValue(kSigno));
+ EXPECT_EQ(si.si_signo, kSigno);
+ EXPECT_EQ(si.si_code, SI_TIMER);
+ EXPECT_EQ(si.si_timerid, timer.get());
+ // si_overrun was reset by timer_settime.
+ EXPECT_EQ(si.si_overrun, 0);
+ EXPECT_EQ(si.si_int, kSigvalue);
+ EXPECT_THAT(sigtimedwait(&mask, &si, &zero_ts),
+ SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST(IntervalTimerTest, AlreadyPendingSignal) {
+ constexpr int kSigno = SIGUSR1;
+ constexpr int kSigvalue = 42;
+
+ // Block kSigno so that we can accumulate overruns.
+ sigset_t mask;
+ sigemptyset(&mask);
+ sigaddset(&mask, kSigno);
+ const auto scoped_sigmask =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, mask));
+
+ // Send ourselves a signal, preventing the timer from enqueuing.
+ ASSERT_THAT(tgkill(getpid(), gettid(), kSigno), SyscallSucceeds());
+
+ struct sigevent sev = {};
+ sev.sigev_notify = SIGEV_THREAD_ID;
+ sev.sigev_signo = kSigno;
+ sev.sigev_value.sival_int = kSigvalue;
+ sev.sigev_notify_thread_id = gettid();
+ auto timer = ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev));
+
+ constexpr absl::Duration kPeriod = absl::Seconds(1);
+ constexpr int kCycles = 3;
+ struct itimerspec its = {};
+ its.it_value = its.it_interval = absl::ToTimespec(kPeriod);
+ ASSERT_NO_ERRNO(timer.Set(0, its));
+
+ // End the sleep one cycle short; we will sleep for one more cycle below.
+ absl::SleepFor(kPeriod * (kCycles - 1));
+
+ // Dequeue the first signal, which we sent to ourselves with tgkill.
+ siginfo_t si;
+ struct timespec zero_ts = absl::ToTimespec(absl::ZeroDuration());
+ ASSERT_THAT(sigtimedwait(&mask, &si, &zero_ts),
+ SyscallSucceedsWithValue(kSigno));
+ EXPECT_EQ(si.si_signo, kSigno);
+ // glibc sigtimedwait silently replaces SI_TKILL with SI_USER:
+ // sysdeps/unix/sysv/linux/sigtimedwait.c:__sigtimedwait(). This isn't
+ // documented, so we don't depend on it.
+ EXPECT_THAT(si.si_code, AnyOf(SI_USER, SI_TKILL));
+
+ // Sleep for 1 more cycle to give the timer time to send a signal.
+ absl::SleepFor(kPeriod + kTimerSlack);
+
+ // At least kCycles expirations should have occurred, resulting in kCycles-1
+ // overruns (the last expiration sent the signal successfully).
+ ASSERT_THAT(sigtimedwait(&mask, &si, &zero_ts),
+ SyscallSucceedsWithValue(kSigno));
+ EXPECT_EQ(si.si_signo, kSigno);
+ EXPECT_EQ(si.si_code, SI_TIMER);
+ EXPECT_EQ(si.si_timerid, timer.get());
+ EXPECT_GE(si.si_overrun, kCycles - 1);
+ EXPECT_EQ(si.si_int, kSigvalue);
+
+ // Kill the timer, then drain any additional signal it may have enqueued. We
+ // can't do this before the preceding sigtimedwait because stopping or
+ // deleting the timer resets si_overrun to 0.
+ timer.reset();
+ sigtimedwait(&mask, &si, &zero_ts);
+}
+
+TEST(IntervalTimerTest, IgnoredSignalCountsAsOverrun) {
+ constexpr int kSigno = SIGUSR1;
+ constexpr int kSigvalue = 42;
+
+ // Ignore kSigno.
+ struct sigaction sa = {};
+ sa.sa_handler = SIG_IGN;
+ const auto scoped_sigaction =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(kSigno, sa));
+
+ // Unblock kSigno so that ignored signals will be discarded.
+ sigset_t mask;
+ sigemptyset(&mask);
+ sigaddset(&mask, kSigno);
+ auto scoped_sigmask =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, mask));
+
+ struct sigevent sev = {};
+ sev.sigev_notify = SIGEV_THREAD_ID;
+ sev.sigev_signo = kSigno;
+ sev.sigev_value.sival_int = kSigvalue;
+ sev.sigev_notify_thread_id = gettid();
+ auto timer = ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev));
+
+ constexpr absl::Duration kPeriod = absl::Seconds(1);
+ constexpr int kCycles = 3;
+ struct itimerspec its = {};
+ its.it_value = its.it_interval = absl::ToTimespec(kPeriod);
+ ASSERT_NO_ERRNO(timer.Set(0, its));
+
+ // End the sleep one cycle short; we will sleep for one more cycle below.
+ absl::SleepFor(kPeriod * (kCycles - 1));
+
+ // Block kSigno so that ignored signals will be enqueued.
+ scoped_sigmask.Release()();
+ scoped_sigmask = ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, mask));
+
+ // Sleep for 1 more cycle to give the timer time to send a signal.
+ absl::SleepFor(kPeriod + kTimerSlack);
+
+ // At least kCycles expirations should have occurred, resulting in kCycles-1
+ // overruns (the last expiration sent the signal successfully).
+ siginfo_t si;
+ struct timespec zero_ts = absl::ToTimespec(absl::ZeroDuration());
+ ASSERT_THAT(sigtimedwait(&mask, &si, &zero_ts),
+ SyscallSucceedsWithValue(kSigno));
+ EXPECT_EQ(si.si_signo, kSigno);
+ EXPECT_EQ(si.si_code, SI_TIMER);
+ EXPECT_EQ(si.si_timerid, timer.get());
+ EXPECT_GE(si.si_overrun, kCycles - 1);
+ EXPECT_EQ(si.si_int, kSigvalue);
+
+ // Kill the timer, then drain any additional signal it may have enqueued. We
+ // can't do this before the preceding sigtimedwait because stopping or
+ // deleting the timer resets si_overrun to 0.
+ timer.reset();
+ sigtimedwait(&mask, &si, &zero_ts);
+}
+
+} // namespace
+} // namespace testing
+} // namespace gvisor
+
+int main(int argc, char** argv) {
+ gvisor::testing::TestInit(&argc, &argv);
+
+ if (FLAGS_timers_test_sleep) {
+ while (true) {
+ absl::SleepFor(absl::Seconds(10));
+ }
+ }
+
+ return RUN_ALL_TESTS();
+}
diff --git a/test/syscalls/linux/tkill.cc b/test/syscalls/linux/tkill.cc
new file mode 100644
index 000000000..9842ccc9b
--- /dev/null
+++ b/test/syscalls/linux/tkill.cc
@@ -0,0 +1,75 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <cerrno>
+#include <csignal>
+
+#include "gtest/gtest.h"
+#include "test/util/logging.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+static int tkill(pid_t tid, int sig) {
+ int ret;
+ do {
+ // NOTE: tkill(2) could return EAGAIN for RT signals.
+ ret = syscall(SYS_tkill, tid, sig);
+ } while (ret == -1 && errno == EAGAIN);
+ return ret;
+}
+
+TEST(TkillTest, InvalidTID) {
+ EXPECT_THAT(tkill(-1, 0), SyscallFailsWithErrno(EINVAL));
+ EXPECT_THAT(tkill(0, 0), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(TkillTest, ValidTID) {
+ EXPECT_THAT(tkill(gettid(), 0), SyscallSucceeds());
+}
+
+void SigHandler(int sig, siginfo_t* info, void* context) {
+ TEST_CHECK(sig == SIGRTMAX);
+ TEST_CHECK(info->si_pid == getpid());
+ TEST_CHECK(info->si_uid == getuid());
+ TEST_CHECK(info->si_code == SI_TKILL);
+}
+
+// Test with a real signal.
+TEST(TkillTest, ValidTIDAndRealSignal) {
+ struct sigaction sa;
+ sa.sa_sigaction = SigHandler;
+ sigfillset(&sa.sa_mask);
+ sa.sa_flags = SA_SIGINFO;
+ ASSERT_THAT(sigaction(SIGRTMAX, &sa, nullptr), SyscallSucceeds());
+ // InitGoogle blocks all RT signals, so we need undo it.
+ sigset_t unblock;
+ sigemptyset(&unblock);
+ sigaddset(&unblock, SIGRTMAX);
+ ASSERT_THAT(sigprocmask(SIG_UNBLOCK, &unblock, nullptr), SyscallSucceeds());
+ EXPECT_THAT(tkill(gettid(), SIGRTMAX), SyscallSucceeds());
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/truncate.cc b/test/syscalls/linux/truncate.cc
new file mode 100644
index 000000000..2616a9147
--- /dev/null
+++ b/test/syscalls/linux/truncate.cc
@@ -0,0 +1,217 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <signal.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <sys/vfs.h>
+#include <time.h>
+#include <unistd.h>
+#include <iostream>
+#include <string>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/strings/string_view.h"
+#include "test/syscalls/linux/file_base.h"
+#include "test/util/capability_util.h"
+#include "test/util/cleanup.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+class FixtureTruncateTest : public FileTest {
+ void SetUp() override { FileTest::SetUp(); }
+};
+
+TEST_F(FixtureTruncateTest, Truncate) {
+ // Get the current rlimit and restore after test run.
+ struct rlimit initial_lim;
+ ASSERT_THAT(getrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds());
+ auto cleanup = Cleanup([&initial_lim] {
+ EXPECT_THAT(setrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds());
+ });
+
+ // Check that it starts at size zero.
+ struct stat buf;
+ ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds());
+ EXPECT_EQ(buf.st_size, 0);
+
+ // Stay at size zero.
+ EXPECT_THAT(truncate(test_file_name_.c_str(), 0), SyscallSucceeds());
+ ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds());
+ EXPECT_EQ(buf.st_size, 0);
+
+ // Grow to ten bytes.
+ EXPECT_THAT(truncate(test_file_name_.c_str(), 10), SyscallSucceeds());
+ ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds());
+ EXPECT_EQ(buf.st_size, 10);
+
+ // Can't be truncated to a negative number.
+ EXPECT_THAT(truncate(test_file_name_.c_str(), -1),
+ SyscallFailsWithErrno(EINVAL));
+
+ // Try growing past the file size limit.
+ sigset_t new_mask;
+ sigemptyset(&new_mask);
+ sigaddset(&new_mask, SIGXFSZ);
+ sigprocmask(SIG_BLOCK, &new_mask, nullptr);
+ struct timespec timelimit;
+ timelimit.tv_sec = 10;
+ timelimit.tv_nsec = 0;
+
+ struct rlimit setlim;
+ setlim.rlim_cur = 1024;
+ setlim.rlim_max = RLIM_INFINITY;
+ ASSERT_THAT(setrlimit(RLIMIT_FSIZE, &setlim), SyscallSucceeds());
+ EXPECT_THAT(truncate(test_file_name_.c_str(), 1025),
+ SyscallFailsWithErrno(EFBIG));
+ EXPECT_EQ(sigtimedwait(&new_mask, nullptr, &timelimit), SIGXFSZ);
+ ASSERT_THAT(sigprocmask(SIG_UNBLOCK, &new_mask, nullptr), SyscallSucceeds());
+
+ // Shrink back down to zero.
+ EXPECT_THAT(truncate(test_file_name_.c_str(), 0), SyscallSucceeds());
+ ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds());
+ EXPECT_EQ(buf.st_size, 0);
+}
+
+TEST_F(FixtureTruncateTest, Ftruncate) {
+ // Get the current rlimit and restore after test run.
+ struct rlimit initial_lim;
+ ASSERT_THAT(getrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds());
+ auto cleanup = Cleanup([&initial_lim] {
+ EXPECT_THAT(setrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds());
+ });
+
+ // Check that it starts at size zero.
+ struct stat buf;
+ ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds());
+ EXPECT_EQ(buf.st_size, 0);
+
+ // Stay at size zero.
+ EXPECT_THAT(ftruncate(test_file_fd_.get(), 0), SyscallSucceeds());
+ ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds());
+ EXPECT_EQ(buf.st_size, 0);
+
+ // Grow to ten bytes.
+ EXPECT_THAT(ftruncate(test_file_fd_.get(), 10), SyscallSucceeds());
+ ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds());
+ EXPECT_EQ(buf.st_size, 10);
+
+ // Can't be truncated to a negative number.
+ EXPECT_THAT(ftruncate(test_file_fd_.get(), -1),
+ SyscallFailsWithErrno(EINVAL));
+
+ // Try growing past the file size limit.
+ sigset_t new_mask;
+ sigemptyset(&new_mask);
+ sigaddset(&new_mask, SIGXFSZ);
+ sigprocmask(SIG_BLOCK, &new_mask, nullptr);
+ struct timespec timelimit;
+ timelimit.tv_sec = 10;
+ timelimit.tv_nsec = 0;
+
+ struct rlimit setlim;
+ setlim.rlim_cur = 1024;
+ setlim.rlim_max = RLIM_INFINITY;
+ ASSERT_THAT(setrlimit(RLIMIT_FSIZE, &setlim), SyscallSucceeds());
+ EXPECT_THAT(ftruncate(test_file_fd_.get(), 1025),
+ SyscallFailsWithErrno(EFBIG));
+ EXPECT_EQ(sigtimedwait(&new_mask, nullptr, &timelimit), SIGXFSZ);
+ ASSERT_THAT(sigprocmask(SIG_UNBLOCK, &new_mask, nullptr), SyscallSucceeds());
+
+ // Shrink back down to zero.
+ EXPECT_THAT(ftruncate(test_file_fd_.get(), 0), SyscallSucceeds());
+ ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds());
+ EXPECT_EQ(buf.st_size, 0);
+}
+
+// Truncating a file down clears that portion of the file.
+TEST_F(FixtureTruncateTest, FtruncateShrinkGrow) {
+ std::vector<char> buf(10, 'a');
+ EXPECT_THAT(WriteFd(test_file_fd_.get(), buf.data(), buf.size()),
+ SyscallSucceedsWithValue(buf.size()));
+
+ // Shrink then regrow the file. This should clear the second half of the file.
+ EXPECT_THAT(ftruncate(test_file_fd_.get(), 5), SyscallSucceeds());
+ EXPECT_THAT(ftruncate(test_file_fd_.get(), 10), SyscallSucceeds());
+
+ EXPECT_THAT(lseek(test_file_fd_.get(), 0, SEEK_SET), SyscallSucceeds());
+
+ std::vector<char> buf2(10);
+ EXPECT_THAT(ReadFd(test_file_fd_.get(), buf2.data(), buf2.size()),
+ SyscallSucceedsWithValue(buf2.size()));
+
+ std::vector<char> expect = {'a', 'a', 'a', 'a', 'a',
+ '\0', '\0', '\0', '\0', '\0'};
+ EXPECT_EQ(expect, buf2);
+}
+
+TEST(TruncateTest, TruncateDir) {
+ auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ EXPECT_THAT(truncate(temp_dir.path().c_str(), 0),
+ SyscallFailsWithErrno(EISDIR));
+}
+
+TEST(TruncateTest, FtruncateDir) {
+ auto temp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(temp_dir.path(), O_DIRECTORY | O_RDONLY));
+ EXPECT_THAT(ftruncate(fd.get(), 0), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(TruncateTest, TruncateNonWriteable) {
+ // Make sure we don't have CAP_DAC_OVERRIDE, since that allows the user to
+ // always override write permissions.
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+ auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), absl::string_view(), 0555 /* mode */));
+ EXPECT_THAT(truncate(temp_file.path().c_str(), 0),
+ SyscallFailsWithErrno(EACCES));
+}
+
+TEST(TruncateTest, FtruncateNonWriteable) {
+ auto temp_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), absl::string_view(), 0555 /* mode */));
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(temp_file.path(), O_RDONLY));
+ EXPECT_THAT(ftruncate(fd.get(), 0), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(TruncateTest, TruncateNonExist) {
+ EXPECT_THAT(truncate("/foo/bar", 0), SyscallFailsWithErrno(ENOENT));
+}
+
+TEST(TruncateTest, FtruncateVirtualTmp_NoRandomSave) {
+ auto temp_file = NewTempAbsPathInDir("/dev/shm");
+ const DisableSave ds; // Incompatible permissions.
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(temp_file, O_RDWR | O_CREAT | O_EXCL, 0));
+ EXPECT_THAT(ftruncate(fd.get(), 100), SyscallSucceeds());
+}
+
+// NOTE: There are additional truncate(2)/ftruncate(2) tests in mknod.cc
+// which are there to avoid running the tests on a number of different
+// filesystems which may not support mknod.
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/udp_bind.cc b/test/syscalls/linux/udp_bind.cc
new file mode 100644
index 000000000..419aaac76
--- /dev/null
+++ b/test/syscalls/linux/udp_bind.cc
@@ -0,0 +1,316 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include "gtest/gtest.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+struct sockaddr_in_common {
+ sa_family_t sin_family;
+ in_port_t sin_port;
+};
+
+struct SendtoTestParam {
+ // Human readable description of test parameter.
+ std::string description;
+
+ // Test is broken in gVisor, skip.
+ bool skip_on_gvisor;
+
+ // Domain for the socket that will do the sending.
+ int send_domain;
+
+ // Address to bind for the socket that will do the sending.
+ struct sockaddr_storage send_addr;
+ socklen_t send_addr_len; // 0 for unbound.
+
+ // Address to connect to for the socket that will do the sending.
+ struct sockaddr_storage connect_addr;
+ socklen_t connect_addr_len; // 0 for no connection.
+
+ // Domain for the socket that will do the receiving.
+ int recv_domain;
+
+ // Address to bind for the socket that will do the receiving.
+ struct sockaddr_storage recv_addr;
+ socklen_t recv_addr_len;
+
+ // Address to send to.
+ struct sockaddr_storage sendto_addr;
+ socklen_t sendto_addr_len;
+
+ // Expected errno for the sendto call.
+ std::vector<int> sendto_errnos; // empty on success.
+};
+
+class SendtoTest : public ::testing::TestWithParam<SendtoTestParam> {
+ protected:
+ SendtoTest() {
+ // gUnit uses printf, so so will we.
+ printf("Testing with %s\n", GetParam().description.c_str());
+ }
+};
+
+TEST_P(SendtoTest, Sendto) {
+ auto param = GetParam();
+
+ SKIP_IF(param.skip_on_gvisor && IsRunningOnGvisor());
+
+ const FileDescriptor s1 =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(param.send_domain, SOCK_DGRAM, 0));
+ const FileDescriptor s2 =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(param.recv_domain, SOCK_DGRAM, 0));
+
+ if (param.send_addr_len > 0) {
+ ASSERT_THAT(bind(s1.get(), reinterpret_cast<sockaddr*>(&param.send_addr),
+ param.send_addr_len),
+ SyscallSucceeds());
+ }
+
+ if (param.connect_addr_len > 0) {
+ ASSERT_THAT(
+ connect(s1.get(), reinterpret_cast<sockaddr*>(&param.connect_addr),
+ param.connect_addr_len),
+ SyscallSucceeds());
+ }
+
+ ASSERT_THAT(bind(s2.get(), reinterpret_cast<sockaddr*>(&param.recv_addr),
+ param.recv_addr_len),
+ SyscallSucceeds());
+
+ struct sockaddr_storage real_recv_addr = {};
+ socklen_t real_recv_addr_len = param.recv_addr_len;
+ ASSERT_THAT(
+ getsockname(s2.get(), reinterpret_cast<sockaddr*>(&real_recv_addr),
+ &real_recv_addr_len),
+ SyscallSucceeds());
+
+ ASSERT_EQ(real_recv_addr_len, param.recv_addr_len);
+
+ int recv_port =
+ reinterpret_cast<sockaddr_in_common*>(&real_recv_addr)->sin_port;
+
+ struct sockaddr_storage sendto_addr = param.sendto_addr;
+ reinterpret_cast<sockaddr_in_common*>(&sendto_addr)->sin_port = recv_port;
+
+ char buf[20] = {};
+ if (!param.sendto_errnos.empty()) {
+ ASSERT_THAT(RetryEINTR(sendto)(s1.get(), buf, sizeof(buf), 0,
+ reinterpret_cast<sockaddr*>(&sendto_addr),
+ param.sendto_addr_len),
+ SyscallFailsWithErrno(ElementOf(param.sendto_errnos)));
+ return;
+ }
+
+ ASSERT_THAT(RetryEINTR(sendto)(s1.get(), buf, sizeof(buf), 0,
+ reinterpret_cast<sockaddr*>(&sendto_addr),
+ param.sendto_addr_len),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ struct sockaddr_storage got_addr = {};
+ socklen_t got_addr_len = sizeof(sockaddr_storage);
+ ASSERT_THAT(RetryEINTR(recvfrom)(s2.get(), buf, sizeof(buf), 0,
+ reinterpret_cast<sockaddr*>(&got_addr),
+ &got_addr_len),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ ASSERT_GT(got_addr_len, sizeof(sockaddr_in_common));
+ int got_port = reinterpret_cast<sockaddr_in_common*>(&got_addr)->sin_port;
+
+ struct sockaddr_storage sender_addr = {};
+ socklen_t sender_addr_len = sizeof(sockaddr_storage);
+ ASSERT_THAT(getsockname(s1.get(), reinterpret_cast<sockaddr*>(&sender_addr),
+ &sender_addr_len),
+ SyscallSucceeds());
+
+ ASSERT_GT(sender_addr_len, sizeof(sockaddr_in_common));
+ int sender_port =
+ reinterpret_cast<sockaddr_in_common*>(&sender_addr)->sin_port;
+
+ EXPECT_EQ(got_port, sender_port);
+}
+
+socklen_t Ipv4Addr(sockaddr_storage* addr, int port = 0) {
+ auto addr4 = reinterpret_cast<sockaddr_in*>(addr);
+ addr4->sin_family = AF_INET;
+ addr4->sin_port = port;
+ inet_pton(AF_INET, "127.0.0.1", &addr4->sin_addr.s_addr);
+ return sizeof(struct sockaddr_in);
+}
+
+socklen_t Ipv6Addr(sockaddr_storage* addr, int port = 0) {
+ auto addr6 = reinterpret_cast<sockaddr_in6*>(addr);
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = port;
+ inet_pton(AF_INET6, "::1", &addr6->sin6_addr.s6_addr);
+ return sizeof(struct sockaddr_in6);
+}
+
+socklen_t Ipv4MappedIpv6Addr(sockaddr_storage* addr, int port = 0) {
+ auto addr6 = reinterpret_cast<sockaddr_in6*>(addr);
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = port;
+ inet_pton(AF_INET6, "::ffff:127.0.0.1", &addr6->sin6_addr.s6_addr);
+ return sizeof(struct sockaddr_in6);
+}
+
+INSTANTIATE_TEST_CASE_P(
+ UdpBindTest, SendtoTest,
+ ::testing::Values(
+ []() {
+ SendtoTestParam param = {};
+ param.description = "IPv4 mapped IPv6 sendto IPv4 mapped IPv6";
+ param.send_domain = AF_INET6;
+ param.send_addr_len = Ipv4MappedIpv6Addr(&param.send_addr);
+ param.recv_domain = AF_INET6;
+ param.recv_addr_len = Ipv4MappedIpv6Addr(&param.recv_addr);
+ param.sendto_addr_len = Ipv4MappedIpv6Addr(&param.sendto_addr);
+ return param;
+ }(),
+ []() {
+ SendtoTestParam param = {};
+ param.description = "IPv6 sendto IPv6";
+ param.send_domain = AF_INET6;
+ param.send_addr_len = Ipv6Addr(&param.send_addr);
+ param.recv_domain = AF_INET6;
+ param.recv_addr_len = Ipv6Addr(&param.recv_addr);
+ param.sendto_addr_len = Ipv6Addr(&param.sendto_addr);
+ return param;
+ }(),
+ []() {
+ SendtoTestParam param = {};
+ param.description = "IPv4 sendto IPv4";
+ param.send_domain = AF_INET;
+ param.send_addr_len = Ipv4Addr(&param.send_addr);
+ param.recv_domain = AF_INET;
+ param.recv_addr_len = Ipv4Addr(&param.recv_addr);
+ param.sendto_addr_len = Ipv4Addr(&param.sendto_addr);
+ return param;
+ }(),
+ []() {
+ SendtoTestParam param = {};
+ param.description = "IPv4 mapped IPv6 sendto IPv4";
+ param.send_domain = AF_INET6;
+ param.send_addr_len = Ipv4MappedIpv6Addr(&param.send_addr);
+ param.recv_domain = AF_INET;
+ param.recv_addr_len = Ipv4Addr(&param.recv_addr);
+ param.sendto_addr_len = Ipv4MappedIpv6Addr(&param.sendto_addr);
+ return param;
+ }(),
+ []() {
+ SendtoTestParam param = {};
+ param.description = "IPv4 sendto IPv4 mapped IPv6";
+ param.send_domain = AF_INET;
+ param.send_addr_len = Ipv4Addr(&param.send_addr);
+ param.recv_domain = AF_INET6;
+ param.recv_addr_len = Ipv4MappedIpv6Addr(&param.recv_addr);
+ param.sendto_addr_len = Ipv4Addr(&param.sendto_addr);
+ return param;
+ }(),
+ []() {
+ SendtoTestParam param = {};
+ param.description = "unbound IPv6 sendto IPv4 mapped IPv6";
+ param.send_domain = AF_INET6;
+ param.recv_domain = AF_INET6;
+ param.recv_addr_len = Ipv4MappedIpv6Addr(&param.recv_addr);
+ param.sendto_addr_len = Ipv4MappedIpv6Addr(&param.sendto_addr);
+ return param;
+ }(),
+ []() {
+ SendtoTestParam param = {};
+ param.description = "unbound IPv6 sendto IPv4";
+ param.send_domain = AF_INET6;
+ param.recv_domain = AF_INET;
+ param.recv_addr_len = Ipv4Addr(&param.recv_addr);
+ param.sendto_addr_len = Ipv4MappedIpv6Addr(&param.sendto_addr);
+ return param;
+ }(),
+ []() {
+ SendtoTestParam param = {};
+ param.description = "IPv6 sendto IPv4";
+ param.send_domain = AF_INET6;
+ param.send_addr_len = Ipv6Addr(&param.send_addr);
+ param.recv_domain = AF_INET;
+ param.recv_addr_len = Ipv4Addr(&param.recv_addr);
+ param.sendto_addr_len = Ipv4MappedIpv6Addr(&param.sendto_addr);
+ param.sendto_errnos = {ENETUNREACH};
+ return param;
+ }(),
+ []() {
+ SendtoTestParam param = {};
+ param.description = "IPv4 mapped IPv6 sendto IPv6";
+ param.send_domain = AF_INET6;
+ param.send_addr_len = Ipv4MappedIpv6Addr(&param.send_addr);
+ param.recv_domain = AF_INET6;
+ param.recv_addr_len = Ipv6Addr(&param.recv_addr);
+ param.sendto_addr_len = Ipv6Addr(&param.sendto_addr);
+ param.sendto_errnos = {EAFNOSUPPORT};
+ // The errno returned changed in Linux commit c8e6ad0829a723.
+ param.sendto_errnos = {EINVAL, EAFNOSUPPORT};
+ return param;
+ }(),
+ []() {
+ SendtoTestParam param = {};
+ param.description = "connected IPv4 mapped IPv6 sendto IPv6";
+ param.send_domain = AF_INET6;
+ param.connect_addr_len =
+ Ipv4MappedIpv6Addr(&param.connect_addr, 5000);
+ param.recv_domain = AF_INET6;
+ param.recv_addr_len = Ipv6Addr(&param.recv_addr);
+ param.sendto_addr_len = Ipv6Addr(&param.sendto_addr);
+ // The errno returned changed in Linux commit c8e6ad0829a723.
+ param.sendto_errnos = {EINVAL, EAFNOSUPPORT};
+ return param;
+ }(),
+ []() {
+ SendtoTestParam param = {};
+ param.description = "connected IPv6 sendto IPv4 mapped IPv6";
+ // TODO: Determine if this inconsistent behavior is worth
+ // implementing.
+ param.skip_on_gvisor = true;
+ param.send_domain = AF_INET6;
+ param.connect_addr_len = Ipv6Addr(&param.connect_addr, 5000);
+ param.recv_domain = AF_INET6;
+ param.recv_addr_len = Ipv4MappedIpv6Addr(&param.recv_addr);
+ param.sendto_addr_len = Ipv4MappedIpv6Addr(&param.sendto_addr);
+ return param;
+ }(),
+ []() {
+ SendtoTestParam param = {};
+ param.description = "connected IPv6 sendto IPv4";
+ // TODO: Determine if this inconsistent behavior is worth
+ // implementing.
+ param.skip_on_gvisor = true;
+ param.send_domain = AF_INET6;
+ param.connect_addr_len = Ipv6Addr(&param.connect_addr, 5000);
+ param.recv_domain = AF_INET;
+ param.recv_addr_len = Ipv4Addr(&param.recv_addr);
+ param.sendto_addr_len = Ipv4MappedIpv6Addr(&param.sendto_addr);
+ return param;
+ }()));
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/udp_socket.cc b/test/syscalls/linux/udp_socket.cc
new file mode 100644
index 000000000..a02b418a3
--- /dev/null
+++ b/test/syscalls/linux/udp_socket.cc
@@ -0,0 +1,941 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <linux/errqueue.h>
+#include <netinet/in.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include "gtest/gtest.h"
+#include "absl/base/macros.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// The initial port to be be used on gvisor.
+constexpr int TestPort = 40000;
+
+// Fixture for tests parameterized by the address family to use (AF_INET and
+// AF_INET6) when creating sockets.
+class UdpSocketTest : public ::testing::TestWithParam<int> {
+ protected:
+ // Creates two sockets that will be used by test cases.
+ void SetUp() override;
+
+ // Closes the sockets created by SetUp().
+ void TearDown() override {
+ EXPECT_THAT(close(s_), SyscallSucceeds());
+ EXPECT_THAT(close(t_), SyscallSucceeds());
+
+ for (size_t i = 0; i < ABSL_ARRAYSIZE(ports_); ++i) {
+ ASSERT_NO_ERRNO(FreeAvailablePort(ports_[i]));
+ }
+ }
+
+ // First UDP socket.
+ int s_;
+
+ // Second UDP socket.
+ int t_;
+
+ // The length of the socket address.
+ socklen_t addrlen_;
+
+ // Initialized address pointing to loopback and port TestPort+i.
+ struct sockaddr* addr_[3];
+
+ // Initialize "any" address.
+ struct sockaddr* anyaddr_;
+
+ // Used ports.
+ int ports_[3];
+
+ private:
+ // Storage for the loopback addresses.
+ struct sockaddr_storage addr_storage_[3];
+
+ // Storage for the "any" address.
+ struct sockaddr_storage anyaddr_storage_;
+};
+
+// Gets a pointer to the port component of the given address.
+uint16_t* Port(struct sockaddr_storage* addr) {
+ switch (addr->ss_family) {
+ case AF_INET: {
+ auto sin = reinterpret_cast<struct sockaddr_in*>(addr);
+ return &sin->sin_port;
+ }
+ case AF_INET6: {
+ auto sin6 = reinterpret_cast<struct sockaddr_in6*>(addr);
+ return &sin6->sin6_port;
+ }
+ }
+
+ return nullptr;
+}
+
+void UdpSocketTest::SetUp() {
+ ASSERT_THAT(s_ = socket(GetParam(), SOCK_DGRAM, IPPROTO_UDP),
+ SyscallSucceeds());
+
+ ASSERT_THAT(t_ = socket(GetParam(), SOCK_DGRAM, IPPROTO_UDP),
+ SyscallSucceeds());
+
+ memset(&anyaddr_storage_, 0, sizeof(anyaddr_storage_));
+ anyaddr_ = reinterpret_cast<struct sockaddr*>(&anyaddr_storage_);
+ anyaddr_->sa_family = GetParam();
+
+ // Initialize address-family-specific values.
+ switch (GetParam()) {
+ case AF_INET: {
+ auto sin = reinterpret_cast<struct sockaddr_in*>(&anyaddr_storage_);
+ addrlen_ = sizeof(*sin);
+ sin->sin_addr.s_addr = htonl(INADDR_ANY);
+ break;
+ }
+ case AF_INET6: {
+ auto sin6 = reinterpret_cast<struct sockaddr_in6*>(&anyaddr_storage_);
+ addrlen_ = sizeof(*sin6);
+ sin6->sin6_addr = in6addr_any;
+ break;
+ }
+ }
+
+ if (gvisor::testing::IsRunningOnGvisor()) {
+ for (size_t i = 0; i < ABSL_ARRAYSIZE(ports_); ++i) {
+ ports_[i] = TestPort + i;
+ }
+ } else {
+ // When not under gvisor, use utility function to pick port. Assert that
+ // all ports are different.
+ std::string error;
+ for (size_t i = 0; i < ABSL_ARRAYSIZE(ports_); ++i) {
+ // Find an unused port, we specify port 0 to allow the kernel to provide
+ // the port.
+ bool unique = true;
+ do {
+ ports_[i] = ASSERT_NO_ERRNO_AND_VALUE(PortAvailable(
+ 0, AddressFamily::kDualStack, SocketType::kUdp, false));
+ ASSERT_GT(ports_[i], 0);
+ for (size_t j = 0; j < i; ++j) {
+ if (ports_[j] == ports_[i]) {
+ unique = false;
+ break;
+ }
+ }
+ } while (!unique);
+ }
+ }
+
+ // Initialize the sockaddrs.
+ for (size_t i = 0; i < ABSL_ARRAYSIZE(addr_); ++i) {
+ memset(&addr_storage_[i], 0, sizeof(addr_storage_[i]));
+
+ addr_[i] = reinterpret_cast<struct sockaddr*>(&addr_storage_[i]);
+ addr_[i]->sa_family = GetParam();
+
+ switch (GetParam()) {
+ case AF_INET: {
+ auto sin = reinterpret_cast<struct sockaddr_in*>(addr_[i]);
+ sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ sin->sin_port = htons(ports_[i]);
+ break;
+ }
+ case AF_INET6: {
+ auto sin6 = reinterpret_cast<struct sockaddr_in6*>(addr_[i]);
+ sin6->sin6_addr = in6addr_loopback;
+ sin6->sin6_port = htons(ports_[i]);
+ break;
+ }
+ }
+ }
+}
+
+TEST_P(UdpSocketTest, Creation) {
+ int s_;
+
+ ASSERT_THAT(s_ = socket(GetParam(), SOCK_DGRAM, IPPROTO_UDP),
+ SyscallSucceeds());
+ EXPECT_THAT(close(s_), SyscallSucceeds());
+
+ ASSERT_THAT(s_ = socket(GetParam(), SOCK_DGRAM, 0), SyscallSucceeds());
+ EXPECT_THAT(close(s_), SyscallSucceeds());
+
+ ASSERT_THAT(s_ = socket(GetParam(), SOCK_STREAM, IPPROTO_UDP),
+ SyscallFails());
+}
+
+TEST_P(UdpSocketTest, Getsockname) {
+ // Check that we're not bound.
+ struct sockaddr_storage addr;
+ socklen_t addrlen = sizeof(addr);
+ EXPECT_THAT(getsockname(s_, reinterpret_cast<sockaddr*>(&addr), &addrlen),
+ SyscallSucceeds());
+ EXPECT_EQ(addrlen, addrlen_);
+ EXPECT_EQ(memcmp(&addr, anyaddr_, addrlen_), 0);
+
+ // Bind, then check that we get the right address.
+ ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds());
+
+ addrlen = sizeof(addr);
+ EXPECT_THAT(getsockname(s_, reinterpret_cast<sockaddr*>(&addr), &addrlen),
+ SyscallSucceeds());
+ EXPECT_EQ(addrlen, addrlen_);
+ EXPECT_EQ(memcmp(&addr, addr_[0], addrlen_), 0);
+}
+
+TEST_P(UdpSocketTest, Getpeername) {
+ // Check that we're not connected.
+ struct sockaddr_storage addr;
+ socklen_t addrlen = sizeof(addr);
+ EXPECT_THAT(getpeername(s_, reinterpret_cast<sockaddr*>(&addr), &addrlen),
+ SyscallFailsWithErrno(ENOTCONN));
+
+ // Connect, then check that we get the right address.
+ ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds());
+
+ addrlen = sizeof(addr);
+ EXPECT_THAT(getpeername(s_, reinterpret_cast<sockaddr*>(&addr), &addrlen),
+ SyscallSucceeds());
+ EXPECT_EQ(addrlen, addrlen_);
+ EXPECT_EQ(memcmp(&addr, addr_[0], addrlen_), 0);
+}
+
+TEST_P(UdpSocketTest, SendNotConnected) {
+ // Do send & write, they must fail.
+ char buf[512];
+ EXPECT_THAT(send(s_, buf, sizeof(buf), 0),
+ SyscallFailsWithErrno(EDESTADDRREQ));
+
+ EXPECT_THAT(write(s_, buf, sizeof(buf)), SyscallFailsWithErrno(EDESTADDRREQ));
+
+ // Use sendto.
+ ASSERT_THAT(sendto(s_, buf, sizeof(buf), 0, addr_[0], addrlen_),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ // Check that we're bound now.
+ struct sockaddr_storage addr;
+ socklen_t addrlen = sizeof(addr);
+ EXPECT_THAT(getsockname(s_, reinterpret_cast<sockaddr*>(&addr), &addrlen),
+ SyscallSucceeds());
+ EXPECT_EQ(addrlen, addrlen_);
+ EXPECT_NE(*Port(&addr), 0);
+}
+
+TEST_P(UdpSocketTest, ConnectBinds) {
+ // Connect the socket.
+ ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds());
+
+ // Check that we're bound now.
+ struct sockaddr_storage addr;
+ socklen_t addrlen = sizeof(addr);
+ EXPECT_THAT(getsockname(s_, reinterpret_cast<sockaddr*>(&addr), &addrlen),
+ SyscallSucceeds());
+ EXPECT_EQ(addrlen, addrlen_);
+ EXPECT_NE(*Port(&addr), 0);
+}
+
+TEST_P(UdpSocketTest, ReceiveNotBound) {
+ char buf[512];
+ EXPECT_THAT(recv(s_, buf, sizeof(buf), MSG_DONTWAIT),
+ SyscallFailsWithErrno(EWOULDBLOCK));
+}
+
+TEST_P(UdpSocketTest, Bind) {
+ ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds());
+
+ // Try to bind again.
+ EXPECT_THAT(bind(s_, addr_[1], addrlen_), SyscallFailsWithErrno(EINVAL));
+
+ // Check that we're still bound to the original address.
+ struct sockaddr_storage addr;
+ socklen_t addrlen = sizeof(addr);
+ EXPECT_THAT(getsockname(s_, reinterpret_cast<sockaddr*>(&addr), &addrlen),
+ SyscallSucceeds());
+ EXPECT_EQ(addrlen, addrlen_);
+ EXPECT_EQ(memcmp(&addr, addr_[0], addrlen_), 0);
+}
+
+TEST_P(UdpSocketTest, BindInUse) {
+ ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds());
+
+ // Try to bind again.
+ EXPECT_THAT(bind(t_, addr_[0], addrlen_), SyscallFailsWithErrno(EADDRINUSE));
+}
+
+TEST_P(UdpSocketTest, ReceiveAfterConnect) {
+ // Connect s_ to loopback:TestPort, and bind t_ to loopback:TestPort.
+ ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds());
+ ASSERT_THAT(bind(t_, addr_[0], addrlen_), SyscallSucceeds());
+
+ // Get the address s_ was bound to during connect.
+ struct sockaddr_storage addr;
+ socklen_t addrlen = sizeof(addr);
+ EXPECT_THAT(getsockname(s_, reinterpret_cast<sockaddr*>(&addr), &addrlen),
+ SyscallSucceeds());
+ EXPECT_EQ(addrlen, addrlen_);
+
+ // Send from t_ to s_.
+ char buf[512];
+ RandomizeBuffer(buf, sizeof(buf));
+ ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0,
+ reinterpret_cast<sockaddr*>(&addr), addrlen),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ // Receive the data.
+ char received[512];
+ EXPECT_THAT(recv(s_, received, sizeof(received), 0),
+ SyscallSucceedsWithValue(sizeof(received)));
+ EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0);
+}
+
+TEST_P(UdpSocketTest, Connect) {
+ ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds());
+
+ // Check that we're connected to the right peer.
+ struct sockaddr_storage peer;
+ socklen_t peerlen = sizeof(peer);
+ EXPECT_THAT(getpeername(s_, reinterpret_cast<sockaddr*>(&peer), &peerlen),
+ SyscallSucceeds());
+ EXPECT_EQ(peerlen, addrlen_);
+ EXPECT_EQ(memcmp(&peer, addr_[0], addrlen_), 0);
+
+ // Try to bind after connect.
+ EXPECT_THAT(bind(s_, addr_[1], addrlen_), SyscallFailsWithErrno(EINVAL));
+
+ // Try to connect again.
+ EXPECT_THAT(connect(s_, addr_[2], addrlen_), SyscallSucceeds());
+
+ // Check that peer name changed.
+ peerlen = sizeof(peer);
+ EXPECT_THAT(getpeername(s_, reinterpret_cast<sockaddr*>(&peer), &peerlen),
+ SyscallSucceeds());
+ EXPECT_EQ(peerlen, addrlen_);
+ EXPECT_EQ(memcmp(&peer, addr_[2], addrlen_), 0);
+}
+
+TEST_P(UdpSocketTest, SendToAddressOtherThanConnected) {
+ ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds());
+
+ // Send to a different destination than we're connected to.
+ char buf[512];
+ EXPECT_THAT(sendto(s_, buf, sizeof(buf), 0, addr_[1], addrlen_),
+ SyscallSucceedsWithValue(sizeof(buf)));
+}
+
+TEST_P(UdpSocketTest, ZerolengthWriteAllowed) {
+ // Bind s_ to loopback:TestPort, and connect to loopback:TestPort+1.
+ ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds());
+ ASSERT_THAT(connect(s_, addr_[1], addrlen_), SyscallSucceeds());
+
+ // Bind t_ to loopback:TestPort+1.
+ ASSERT_THAT(bind(t_, addr_[1], addrlen_), SyscallSucceeds());
+
+ char buf[3];
+ // Send zero length packet from s_ to t_.
+ ASSERT_THAT(write(s_, buf, 0), SyscallSucceedsWithValue(0));
+ // Receive the packet.
+ char received[3];
+ EXPECT_THAT(read(t_, received, sizeof(received)),
+ SyscallSucceedsWithValue(0));
+}
+
+TEST_P(UdpSocketTest, ZerolengthWriteAllowedNonBlockRead) {
+ // Bind s_ to loopback:TestPort, and connect to loopback:TestPort+1.
+ ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds());
+ ASSERT_THAT(connect(s_, addr_[1], addrlen_), SyscallSucceeds());
+
+ // Bind t_ to loopback:TestPort+1.
+ ASSERT_THAT(bind(t_, addr_[1], addrlen_), SyscallSucceeds());
+
+ // Set t_ to non-blocking.
+ int opts = 0;
+ ASSERT_THAT(opts = fcntl(t_, F_GETFL), SyscallSucceeds());
+ ASSERT_THAT(fcntl(t_, F_SETFL, opts | O_NONBLOCK), SyscallSucceeds());
+
+ char buf[3];
+ // Send zero length packet from s_ to t_.
+ ASSERT_THAT(write(s_, buf, 0), SyscallSucceedsWithValue(0));
+ // Receive the packet.
+ char received[3];
+ EXPECT_THAT(read(t_, received, sizeof(received)),
+ SyscallSucceedsWithValue(0));
+ EXPECT_THAT(read(t_, received, sizeof(received)),
+ SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST_P(UdpSocketTest, SendAndReceiveNotConnected) {
+ // Bind s_ to loopback.
+ ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds());
+
+ // Send some data to s_.
+ char buf[512];
+ RandomizeBuffer(buf, sizeof(buf));
+
+ ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, addr_[0], addrlen_),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ // Receive the data.
+ char received[512];
+ EXPECT_THAT(recv(s_, received, sizeof(received), 0),
+ SyscallSucceedsWithValue(sizeof(received)));
+ EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0);
+}
+
+TEST_P(UdpSocketTest, SendAndReceiveConnected) {
+ // Bind s_ to loopback:TestPort, and connect to loopback:TestPort+1.
+ ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds());
+ ASSERT_THAT(connect(s_, addr_[1], addrlen_), SyscallSucceeds());
+
+ // Bind t_ to loopback:TestPort+1.
+ ASSERT_THAT(bind(t_, addr_[1], addrlen_), SyscallSucceeds());
+
+ // Send some data from t_ to s_.
+ char buf[512];
+ RandomizeBuffer(buf, sizeof(buf));
+
+ ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, addr_[0], addrlen_),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ // Receive the data.
+ char received[512];
+ EXPECT_THAT(recv(s_, received, sizeof(received), 0),
+ SyscallSucceedsWithValue(sizeof(received)));
+ EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0);
+}
+
+TEST_P(UdpSocketTest, ReceiveFromNotConnected) {
+ // Bind s_ to loopback:TestPort, and connect to loopback:TestPort+1.
+ ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds());
+ ASSERT_THAT(connect(s_, addr_[1], addrlen_), SyscallSucceeds());
+
+ // Bind t_ to loopback:TestPort+2.
+ ASSERT_THAT(bind(t_, addr_[2], addrlen_), SyscallSucceeds());
+
+ // Send some data from t_ to s_.
+ char buf[512];
+ ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, addr_[0], addrlen_),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ // Check that the data isn't_ received because it was sent from a different
+ // address than we're connected.
+ EXPECT_THAT(recv(s_, buf, sizeof(buf), MSG_DONTWAIT),
+ SyscallFailsWithErrno(EWOULDBLOCK));
+}
+
+TEST_P(UdpSocketTest, ReceiveBeforeConnect) {
+ // Bind s_ to loopback:TestPort.
+ ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds());
+
+ // Bind t_ to loopback:TestPort+2.
+ ASSERT_THAT(bind(t_, addr_[2], addrlen_), SyscallSucceeds());
+
+ // Send some data from t_ to s_.
+ char buf[512];
+ RandomizeBuffer(buf, sizeof(buf));
+
+ ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, addr_[0], addrlen_),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ // Connect to loopback:TestPort+1.
+ ASSERT_THAT(connect(s_, addr_[1], addrlen_), SyscallSucceeds());
+
+ // Receive the data. It works because it was sent before the connect.
+ char received[512];
+ EXPECT_THAT(recv(s_, received, sizeof(received), 0),
+ SyscallSucceedsWithValue(sizeof(received)));
+ EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0);
+
+ // Send again. This time it should not be received.
+ ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, addr_[0], addrlen_),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ EXPECT_THAT(recv(s_, buf, sizeof(buf), MSG_DONTWAIT),
+ SyscallFailsWithErrno(EWOULDBLOCK));
+}
+
+TEST_P(UdpSocketTest, ReceiveFrom) {
+ // Bind s_ to loopback:TestPort, and connect to loopback:TestPort+1.
+ ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds());
+ ASSERT_THAT(connect(s_, addr_[1], addrlen_), SyscallSucceeds());
+
+ // Bind t_ to loopback:TestPort+1.
+ ASSERT_THAT(bind(t_, addr_[1], addrlen_), SyscallSucceeds());
+
+ // Send some data from t_ to s_.
+ char buf[512];
+ RandomizeBuffer(buf, sizeof(buf));
+
+ ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, addr_[0], addrlen_),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ // Receive the data and sender address.
+ char received[512];
+ struct sockaddr_storage addr;
+ socklen_t addrlen = sizeof(addr);
+ EXPECT_THAT(recvfrom(s_, received, sizeof(received), 0,
+ reinterpret_cast<sockaddr*>(&addr), &addrlen),
+ SyscallSucceedsWithValue(sizeof(received)));
+ EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0);
+ EXPECT_EQ(addrlen, addrlen_);
+ EXPECT_EQ(memcmp(&addr, addr_[1], addrlen_), 0);
+}
+
+TEST_P(UdpSocketTest, Listen) {
+ ASSERT_THAT(listen(s_, SOMAXCONN), SyscallFailsWithErrno(EOPNOTSUPP));
+}
+
+TEST_P(UdpSocketTest, Accept) {
+ ASSERT_THAT(accept(s_, nullptr, nullptr), SyscallFailsWithErrno(EOPNOTSUPP));
+}
+
+// This test validates that a read shutdown with pending data allows the read
+// to proceed with the data before returning EAGAIN.
+TEST_P(UdpSocketTest, ReadShutdownNonblockPendingData) {
+ char received[512];
+
+ // Bind t_ to loopback:TestPort+2.
+ ASSERT_THAT(bind(t_, addr_[2], addrlen_), SyscallSucceeds());
+ ASSERT_THAT(connect(t_, addr_[1], addrlen_), SyscallSucceeds());
+
+ // Connect the socket, then try to shutdown again.
+ ASSERT_THAT(bind(s_, addr_[1], addrlen_), SyscallSucceeds());
+ ASSERT_THAT(connect(s_, addr_[2], addrlen_), SyscallSucceeds());
+
+ // Verify that we get EWOULDBLOCK when there is nothing to read.
+ EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT),
+ SyscallFailsWithErrno(EWOULDBLOCK));
+
+ const char* buf = "abc";
+ EXPECT_THAT(write(t_, buf, 3), SyscallSucceedsWithValue(3));
+
+ int opts = 0;
+ ASSERT_THAT(opts = fcntl(s_, F_GETFL), SyscallSucceeds());
+ ASSERT_THAT(fcntl(s_, F_SETFL, opts | O_NONBLOCK), SyscallSucceeds());
+ ASSERT_THAT(opts = fcntl(s_, F_GETFL), SyscallSucceeds());
+ ASSERT_NE(opts & O_NONBLOCK, 0);
+
+ EXPECT_THAT(shutdown(s_, SHUT_RD), SyscallSucceeds());
+
+ // We should get the data even though read has been shutdown.
+ EXPECT_THAT(recv(s_, received, 2, 0), SyscallSucceedsWithValue(2));
+
+ // Because we read less than the entire packet length, since it's a packet
+ // based socket any subsequent reads should return EWOULDBLOCK.
+ EXPECT_THAT(recv(s_, received, 1, 0), SyscallFailsWithErrno(EWOULDBLOCK));
+}
+
+// This test is validating that even after a socket is shutdown if it's
+// reconnected it will reset the shutdown state.
+TEST_P(UdpSocketTest, ReadShutdownSameSocketResetsShutdownState) {
+ char received[512];
+ EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT),
+ SyscallFailsWithErrno(EWOULDBLOCK));
+
+ EXPECT_THAT(shutdown(s_, SHUT_RD), SyscallFailsWithErrno(ENOTCONN));
+
+ EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT),
+ SyscallFailsWithErrno(EWOULDBLOCK));
+
+ // Connect the socket, then try to shutdown again.
+ ASSERT_THAT(bind(s_, addr_[1], addrlen_), SyscallSucceeds());
+ ASSERT_THAT(connect(s_, addr_[2], addrlen_), SyscallSucceeds());
+
+ EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT),
+ SyscallFailsWithErrno(EWOULDBLOCK));
+}
+
+TEST_P(UdpSocketTest, ReadShutdown) {
+ char received[512];
+ EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT),
+ SyscallFailsWithErrno(EWOULDBLOCK));
+
+ EXPECT_THAT(shutdown(s_, SHUT_RD), SyscallFailsWithErrno(ENOTCONN));
+
+ EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT),
+ SyscallFailsWithErrno(EWOULDBLOCK));
+
+ // Connect the socket, then try to shutdown again.
+ ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds());
+
+ EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT),
+ SyscallFailsWithErrno(EWOULDBLOCK));
+
+ EXPECT_THAT(shutdown(s_, SHUT_RD), SyscallSucceeds());
+
+ EXPECT_THAT(recv(s_, received, sizeof(received), 0),
+ SyscallSucceedsWithValue(0));
+}
+
+TEST_P(UdpSocketTest, ReadShutdownDifferentThread) {
+ char received[512];
+ EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT),
+ SyscallFailsWithErrno(EWOULDBLOCK));
+
+ // Connect the socket, then shutdown from another thread.
+ ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds());
+
+ EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT),
+ SyscallFailsWithErrno(EWOULDBLOCK));
+
+ ScopedThread t([&] {
+ absl::SleepFor(absl::Milliseconds(200));
+ EXPECT_THAT(shutdown(this->s_, SHUT_RD), SyscallSucceeds());
+ });
+ EXPECT_THAT(RetryEINTR(recv)(s_, received, sizeof(received), 0),
+ SyscallSucceedsWithValue(0));
+ t.Join();
+
+ EXPECT_THAT(RetryEINTR(recv)(s_, received, sizeof(received), 0),
+ SyscallSucceedsWithValue(0));
+}
+
+TEST_P(UdpSocketTest, WriteShutdown) {
+ EXPECT_THAT(shutdown(s_, SHUT_WR), SyscallFailsWithErrno(ENOTCONN));
+ ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds());
+ EXPECT_THAT(shutdown(s_, SHUT_WR), SyscallSucceeds());
+}
+
+TEST_P(UdpSocketTest, SynchronousReceive) {
+ // Bind s_ to loopback.
+ ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds());
+
+ // Send some data to s_ from another thread.
+ char buf[512];
+ RandomizeBuffer(buf, sizeof(buf));
+
+ // Receive the data prior to actually starting the other thread.
+ char received[512];
+ EXPECT_THAT(RetryEINTR(recv)(s_, received, sizeof(received), MSG_DONTWAIT),
+ SyscallFailsWithErrno(EWOULDBLOCK));
+
+ // Start the thread.
+ ScopedThread t([&] {
+ absl::SleepFor(absl::Milliseconds(200));
+ ASSERT_THAT(
+ sendto(this->t_, buf, sizeof(buf), 0, this->addr_[0], this->addrlen_),
+ SyscallSucceedsWithValue(sizeof(buf)));
+ });
+
+ EXPECT_THAT(RetryEINTR(recv)(s_, received, sizeof(received), 0),
+ SyscallSucceedsWithValue(512));
+ EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0);
+}
+
+TEST_P(UdpSocketTest, BoundaryPreserved_SendRecv) {
+ // Bind s_ to loopback:TestPort.
+ ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds());
+
+ // Send 3 packets from t_ to s_.
+ constexpr int psize = 100;
+ char buf[3 * psize];
+ RandomizeBuffer(buf, sizeof(buf));
+
+ for (int i = 0; i < 3; ++i) {
+ ASSERT_THAT(sendto(t_, buf + i * psize, psize, 0, addr_[0], addrlen_),
+ SyscallSucceedsWithValue(psize));
+ }
+
+ // Receive the data as 3 separate packets.
+ char received[6 * psize];
+ for (int i = 0; i < 3; ++i) {
+ EXPECT_THAT(recv(s_, received + i * psize, 3 * psize, 0),
+ SyscallSucceedsWithValue(psize));
+ }
+ EXPECT_EQ(memcmp(buf, received, 3 * psize), 0);
+}
+
+TEST_P(UdpSocketTest, BoundaryPreserved_WritevReadv) {
+ // Bind s_ to loopback:TestPort.
+ ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds());
+
+ // Direct writes from t_ to s_.
+ ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds());
+
+ // Send 2 packets from t_ to s_, where each packet's data consists of 2
+ // discontiguous iovecs.
+ constexpr size_t kPieceSize = 100;
+ char buf[4 * kPieceSize];
+ RandomizeBuffer(buf, sizeof(buf));
+
+ for (int i = 0; i < 2; i++) {
+ struct iovec iov[2];
+ for (int j = 0; j < 2; j++) {
+ iov[j].iov_base = reinterpret_cast<void*>(
+ reinterpret_cast<uintptr_t>(buf) + (i + 2 * j) * kPieceSize);
+ iov[j].iov_len = kPieceSize;
+ }
+ ASSERT_THAT(writev(t_, iov, 2), SyscallSucceedsWithValue(2 * kPieceSize));
+ }
+
+ // Receive the data as 2 separate packets.
+ char received[6 * kPieceSize];
+ for (int i = 0; i < 2; i++) {
+ struct iovec iov[3];
+ for (int j = 0; j < 3; j++) {
+ iov[j].iov_base = reinterpret_cast<void*>(
+ reinterpret_cast<uintptr_t>(received) + (i + 2 * j) * kPieceSize);
+ iov[j].iov_len = kPieceSize;
+ }
+ ASSERT_THAT(readv(s_, iov, 3), SyscallSucceedsWithValue(2 * kPieceSize));
+ }
+ EXPECT_EQ(memcmp(buf, received, 4 * kPieceSize), 0);
+}
+
+TEST_P(UdpSocketTest, BoundaryPreserved_SendMsgRecvMsg) {
+ // Bind s_ to loopback:TestPort.
+ ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds());
+
+ // Send 2 packets from t_ to s_, where each packet's data consists of 2
+ // discontiguous iovecs.
+ constexpr size_t kPieceSize = 100;
+ char buf[4 * kPieceSize];
+ RandomizeBuffer(buf, sizeof(buf));
+
+ for (int i = 0; i < 2; i++) {
+ struct iovec iov[2];
+ for (int j = 0; j < 2; j++) {
+ iov[j].iov_base = reinterpret_cast<void*>(
+ reinterpret_cast<uintptr_t>(buf) + (i + 2 * j) * kPieceSize);
+ iov[j].iov_len = kPieceSize;
+ }
+ struct msghdr msg = {};
+ msg.msg_name = addr_[0];
+ msg.msg_namelen = addrlen_;
+ msg.msg_iov = iov;
+ msg.msg_iovlen = 2;
+ ASSERT_THAT(sendmsg(t_, &msg, 0), SyscallSucceedsWithValue(2 * kPieceSize));
+ }
+
+ // Receive the data as 2 separate packets.
+ char received[6 * kPieceSize];
+ for (int i = 0; i < 2; i++) {
+ struct iovec iov[3];
+ for (int j = 0; j < 3; j++) {
+ iov[j].iov_base = reinterpret_cast<void*>(
+ reinterpret_cast<uintptr_t>(received) + (i + 2 * j) * kPieceSize);
+ iov[j].iov_len = kPieceSize;
+ }
+ struct msghdr msg = {};
+ msg.msg_iov = iov;
+ msg.msg_iovlen = 3;
+ ASSERT_THAT(recvmsg(s_, &msg, 0), SyscallSucceedsWithValue(2 * kPieceSize));
+ }
+ EXPECT_EQ(memcmp(buf, received, 4 * kPieceSize), 0);
+}
+
+TEST_P(UdpSocketTest, FIONREADShutdown) {
+ int n = -1;
+ EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0));
+ EXPECT_EQ(n, 0);
+
+ // A UDP socket must be connected before it can be shutdown.
+ ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds());
+
+ n = -1;
+ EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0));
+ EXPECT_EQ(n, 0);
+
+ EXPECT_THAT(shutdown(s_, SHUT_RD), SyscallSucceeds());
+
+ n = -1;
+ EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0));
+ EXPECT_EQ(n, 0);
+}
+
+TEST_P(UdpSocketTest, FIONREADWriteShutdown) {
+ int n = -1;
+ EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0));
+ EXPECT_EQ(n, 0);
+
+ // Bind s_ to loopback:TestPort.
+ ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds());
+
+ // A UDP socket must be connected before it can be shutdown.
+ ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds());
+
+ n = -1;
+ EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0));
+ EXPECT_EQ(n, 0);
+
+ const char str[] = "abc";
+ ASSERT_THAT(send(s_, str, sizeof(str), 0),
+ SyscallSucceedsWithValue(sizeof(str)));
+
+ n = -1;
+ EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0));
+ EXPECT_EQ(n, sizeof(str));
+
+ EXPECT_THAT(shutdown(s_, SHUT_RD), SyscallSucceeds());
+
+ n = -1;
+ EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0));
+ EXPECT_EQ(n, sizeof(str));
+}
+
+TEST_P(UdpSocketTest, FIONREAD) {
+ // Bind s_ to loopback:TestPort.
+ ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds());
+
+ // Check that the bound socket with an empty buffer reports an empty first
+ // packet.
+ int n = -1;
+ EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0));
+ EXPECT_EQ(n, 0);
+
+ // Send 3 packets from t_ to s_.
+ constexpr int psize = 100;
+ char buf[3 * psize];
+ RandomizeBuffer(buf, sizeof(buf));
+
+ for (int i = 0; i < 3; ++i) {
+ ASSERT_THAT(sendto(t_, buf + i * psize, psize, 0, addr_[0], addrlen_),
+ SyscallSucceedsWithValue(psize));
+
+ // Check that regardless of how many packets are in the queue, the size
+ // reported is that of a single packet.
+ n = -1;
+ EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0));
+ EXPECT_EQ(n, psize);
+ }
+}
+
+TEST_P(UdpSocketTest, FIONREADZeroLengthPacket) {
+ // Bind s_ to loopback:TestPort.
+ ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds());
+
+ // Check that the bound socket with an empty buffer reports an empty first
+ // packet.
+ int n = -1;
+ EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0));
+ EXPECT_EQ(n, 0);
+
+ // Send 3 packets from t_ to s_.
+ constexpr int psize = 100;
+ char buf[3 * psize];
+ RandomizeBuffer(buf, sizeof(buf));
+
+ for (int i = 0; i < 3; ++i) {
+ ASSERT_THAT(sendto(t_, buf + i * psize, 0, 0, addr_[0], addrlen_),
+ SyscallSucceedsWithValue(0));
+
+ // Check that regardless of how many packets are in the queue, the size
+ // reported is that of a single packet.
+ n = -1;
+ EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0));
+ EXPECT_EQ(n, 0);
+ }
+}
+
+TEST_P(UdpSocketTest, FIONREADZeroLengthWriteShutdown) {
+ int n = -1;
+ EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0));
+ EXPECT_EQ(n, 0);
+
+ // Bind s_ to loopback:TestPort.
+ ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds());
+
+ // A UDP socket must be connected before it can be shutdown.
+ ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds());
+
+ n = -1;
+ EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0));
+ EXPECT_EQ(n, 0);
+
+ const char str[] = "abc";
+ ASSERT_THAT(send(s_, str, 0, 0), SyscallSucceedsWithValue(0));
+
+ n = -1;
+ EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0));
+ EXPECT_EQ(n, 0);
+
+ EXPECT_THAT(shutdown(s_, SHUT_RD), SyscallSucceeds());
+
+ n = -1;
+ EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0));
+ EXPECT_EQ(n, 0);
+}
+
+TEST_P(UdpSocketTest, ErrorQueue) {
+ char cmsgbuf[CMSG_SPACE(sizeof(sock_extended_err))];
+ msghdr msg;
+ memset(&msg, 0, sizeof(msg));
+ iovec iov;
+ memset(&iov, 0, sizeof(iov));
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsgbuf;
+ msg.msg_controllen = sizeof(cmsgbuf);
+
+ // recv*(MSG_ERRQUEUE) never blocks, even without MSG_DONTWAIT.
+ EXPECT_THAT(RetryEINTR(recvmsg)(s_, &msg, MSG_ERRQUEUE),
+ SyscallFailsWithErrno(EAGAIN));
+}
+
+TEST_P(UdpSocketTest, SoTimestamp) {
+ ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds());
+ ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds());
+
+ int v = 1;
+ EXPECT_THAT(setsockopt(s_, SOL_SOCKET, SO_TIMESTAMP, &v, sizeof(v)),
+ SyscallSucceeds());
+
+ char buf[3];
+ // Send zero length packet from t_ to s_.
+ ASSERT_THAT(RetryEINTR(write)(t_, buf, 0), SyscallSucceedsWithValue(0));
+
+ char cmsgbuf[CMSG_SPACE(sizeof(struct timeval))];
+ msghdr msg;
+ memset(&msg, 0, sizeof(msg));
+ iovec iov;
+ memset(&iov, 0, sizeof(iov));
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsgbuf;
+ msg.msg_controllen = sizeof(cmsgbuf);
+
+ ASSERT_THAT(RetryEINTR(recvmsg)(s_, &msg, 0), SyscallSucceedsWithValue(0));
+
+ struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+ ASSERT_NE(cmsg, nullptr);
+ ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET);
+ ASSERT_EQ(cmsg->cmsg_type, SO_TIMESTAMP);
+ ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(struct timeval)));
+
+ struct timeval tv = {};
+ memcpy(&tv, CMSG_DATA(cmsg), sizeof(struct timeval));
+
+ ASSERT_TRUE(tv.tv_sec != 0 || tv.tv_usec != 0);
+}
+
+TEST_P(UdpSocketTest, WriteShutdownNotConnected) {
+ EXPECT_THAT(shutdown(s_, SHUT_WR), SyscallFailsWithErrno(ENOTCONN));
+}
+
+INSTANTIATE_TEST_CASE_P(AllInetTests, UdpSocketTest,
+ ::testing::Values(AF_INET, AF_INET6));
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/uidgid.cc b/test/syscalls/linux/uidgid.cc
new file mode 100644
index 000000000..c0c1f2960
--- /dev/null
+++ b/test/syscalls/linux/uidgid.cc
@@ -0,0 +1,277 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <grp.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_join.h"
+#include "test/util/capability_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+DEFINE_int32(scratch_uid1, 65534, "first scratch UID");
+DEFINE_int32(scratch_uid2, 65533, "second scratch UID");
+DEFINE_int32(scratch_gid1, 65534, "first scratch GID");
+DEFINE_int32(scratch_gid2, 65533, "second scratch GID");
+
+using ::testing::UnorderedElementsAreArray;
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(UidGidTest, Getuid) {
+ uid_t ruid, euid, suid;
+ EXPECT_THAT(getresuid(&ruid, &euid, &suid), SyscallSucceeds());
+ EXPECT_THAT(getuid(), SyscallSucceedsWithValue(ruid));
+ EXPECT_THAT(geteuid(), SyscallSucceedsWithValue(euid));
+}
+
+TEST(UidGidTest, Getgid) {
+ gid_t rgid, egid, sgid;
+ EXPECT_THAT(getresgid(&rgid, &egid, &sgid), SyscallSucceeds());
+ EXPECT_THAT(getgid(), SyscallSucceedsWithValue(rgid));
+ EXPECT_THAT(getegid(), SyscallSucceedsWithValue(egid));
+}
+
+TEST(UidGidTest, Getgroups) {
+ // "If size is zero, list is not modified, but the total number of
+ // supplementary group IDs for the process is returned." - getgroups(2)
+ int nr_groups;
+ ASSERT_THAT(nr_groups = getgroups(0, nullptr), SyscallSucceeds());
+ std::vector<gid_t> list(nr_groups);
+ EXPECT_THAT(getgroups(list.size(), list.data()), SyscallSucceeds());
+
+ // "EINVAL: size is less than the number of supplementary group IDs, but is
+ // not zero."
+ EXPECT_THAT(getgroups(-1, nullptr), SyscallFailsWithErrno(EINVAL));
+
+ // Testing for EFAULT requires actually having groups, which isn't guaranteed
+ // here; see the setgroups test below.
+}
+
+// If the caller's real/effective/saved user/group IDs are all 0, IsRoot returns
+// true. Otherwise IsRoot logs an explanatory message and returns false.
+PosixErrorOr<bool> IsRoot() {
+ uid_t ruid, euid, suid;
+ int rc = getresuid(&ruid, &euid, &suid);
+ MaybeSave();
+ if (rc < 0) {
+ return PosixError(errno, "getresuid");
+ }
+ if (ruid != 0 || euid != 0 || suid != 0) {
+ return false;
+ }
+ gid_t rgid, egid, sgid;
+ rc = getresgid(&rgid, &egid, &sgid);
+ MaybeSave();
+ if (rc < 0) {
+ return PosixError(errno, "getresgid");
+ }
+ if (rgid != 0 || egid != 0 || sgid != 0) {
+ return false;
+ }
+ return true;
+}
+
+// Checks that the calling process' real/effective/saved user IDs are
+// ruid/euid/suid respectively.
+PosixError CheckUIDs(uid_t ruid, uid_t euid, uid_t suid) {
+ uid_t actual_ruid, actual_euid, actual_suid;
+ int rc = getresuid(&actual_ruid, &actual_euid, &actual_suid);
+ MaybeSave();
+ if (rc < 0) {
+ return PosixError(errno, "getresuid");
+ }
+ if (ruid != actual_ruid || euid != actual_euid || suid != actual_suid) {
+ return PosixError(
+ EPERM, absl::StrCat(
+ "incorrect user IDs: got (",
+ absl::StrJoin({actual_ruid, actual_euid, actual_suid}, ", "),
+ ", wanted (", absl::StrJoin({ruid, euid, suid}, ", "), ")"));
+ }
+ return NoError();
+}
+
+PosixError CheckGIDs(gid_t rgid, gid_t egid, gid_t sgid) {
+ gid_t actual_rgid, actual_egid, actual_sgid;
+ int rc = getresgid(&actual_rgid, &actual_egid, &actual_sgid);
+ MaybeSave();
+ if (rc < 0) {
+ return PosixError(errno, "getresgid");
+ }
+ if (rgid != actual_rgid || egid != actual_egid || sgid != actual_sgid) {
+ return PosixError(
+ EPERM, absl::StrCat(
+ "incorrect group IDs: got (",
+ absl::StrJoin({actual_rgid, actual_egid, actual_sgid}, ", "),
+ ", wanted (", absl::StrJoin({rgid, egid, sgid}, ", "), ")"));
+ }
+ return NoError();
+}
+
+// N.B. These tests may break horribly unless run via a gVisor test runner,
+// because changing UID in one test may forfeit permissions required by other
+// tests. (The test runner runs each test in a separate process.)
+
+TEST(UidGidRootTest, Setuid) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsRoot()));
+
+ // Do setuid in a separate thread so that after finishing this test, the
+ // process can still open files the test harness created before starting this
+ // test. Otherwise, the files are created by root (UID before the test), but
+ // cannot be opened by the `uid` set below after the test. After calling
+ // setuid(non-zero-UID), there is no way to get root privileges back.
+ ScopedThread([&] {
+ // Use syscall instead of glibc setuid wrapper because we want this setuid
+ // call to only apply to this task. POSIX threads, however, require that all
+ // threads have the same UIDs, so using the setuid wrapper sets all threads'
+ // real UID.
+ EXPECT_THAT(syscall(SYS_setuid, -1), SyscallFailsWithErrno(EINVAL));
+
+ const uid_t uid = FLAGS_scratch_uid1;
+ EXPECT_THAT(syscall(SYS_setuid, uid), SyscallSucceeds());
+ // "If the effective UID of the caller is root (more precisely: if the
+ // caller has the CAP_SETUID capability), the real UID and saved set-user-ID
+ // are also set." - setuid(2)
+ EXPECT_NO_ERRNO(CheckUIDs(uid, uid, uid));
+ });
+}
+
+TEST(UidGidRootTest, Setgid) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsRoot()));
+
+ EXPECT_THAT(setgid(-1), SyscallFailsWithErrno(EINVAL));
+
+ const gid_t gid = FLAGS_scratch_gid1;
+ ASSERT_THAT(setgid(gid), SyscallSucceeds());
+ EXPECT_NO_ERRNO(CheckGIDs(gid, gid, gid));
+}
+
+TEST(UidGidRootTest, SetgidNotFromThreadGroupLeader) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsRoot()));
+
+ const gid_t gid = FLAGS_scratch_gid1;
+ // NOTE: Do setgid in a separate thread so that we can test if
+ // info.si_pid is set correctly.
+ ScopedThread([gid] { ASSERT_THAT(setgid(gid), SyscallSucceeds()); });
+ EXPECT_NO_ERRNO(CheckGIDs(gid, gid, gid));
+}
+
+TEST(UidGidRootTest, Setreuid) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsRoot()));
+
+ // "Supplying a value of -1 for either the real or effective user ID forces
+ // the system to leave that ID unchanged." - setreuid(2)
+ EXPECT_THAT(setreuid(-1, -1), SyscallSucceeds());
+ EXPECT_NO_ERRNO(CheckUIDs(0, 0, 0));
+
+ // Do setuid in a separate thread so that after finishing this test, the
+ // process can still open files the test harness created before starting this
+ // test. Otherwise, the files are created by root (UID before the test), but
+ // cannot be opened by the `uid` set below after the test. After calling
+ // setuid(non-zero-UID), there is no way to get root privileges back.
+ ScopedThread([&] {
+ const uid_t ruid = FLAGS_scratch_uid1;
+ const uid_t euid = FLAGS_scratch_uid2;
+
+ // Use syscall instead of glibc setuid wrapper because we want this setuid
+ // call to only apply to this task. posix threads, however, require that all
+ // threads have the same UIDs, so using the setuid wrapper sets all threads'
+ // real UID.
+ EXPECT_THAT(syscall(SYS_setreuid, ruid, euid), SyscallSucceeds());
+
+ // "If the real user ID is set or the effective user ID is set to a value
+ // not equal to the previous real user ID, the saved set-user-ID will be set
+ // to the new effective user ID." - setreuid(2)
+ EXPECT_NO_ERRNO(CheckUIDs(ruid, euid, euid));
+ });
+}
+
+TEST(UidGidRootTest, Setregid) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsRoot()));
+
+ EXPECT_THAT(setregid(-1, -1), SyscallSucceeds());
+ EXPECT_NO_ERRNO(CheckGIDs(0, 0, 0));
+
+ const gid_t rgid = FLAGS_scratch_gid1;
+ const gid_t egid = FLAGS_scratch_gid2;
+ ASSERT_THAT(setregid(rgid, egid), SyscallSucceeds());
+ EXPECT_NO_ERRNO(CheckGIDs(rgid, egid, egid));
+}
+
+TEST(UidGidRootTest, Setresuid) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsRoot()));
+
+ // "If one of the arguments equals -1, the corresponding value is not
+ // changed." - setresuid(2)
+ EXPECT_THAT(setresuid(-1, -1, -1), SyscallSucceeds());
+ EXPECT_NO_ERRNO(CheckUIDs(0, 0, 0));
+
+ // Do setuid in a separate thread so that after finishing this test, the
+ // process can still open files the test harness created before starting this
+ // test. Otherwise, the files are created by root (UID before the test), but
+ // cannot be opened by the `uid` set below after the test. After calling
+ // setuid(non-zero-UID), there is no way to get root privileges back.
+ ScopedThread([&] {
+ const uid_t ruid = 12345;
+ const uid_t euid = 23456;
+ const uid_t suid = 34567;
+
+ // Use syscall instead of glibc setuid wrapper because we want this setuid
+ // call to only apply to this task. posix threads, however, require that all
+ // threads have the same UIDs, so using the setuid wrapper sets all threads'
+ // real UID.
+ EXPECT_THAT(syscall(SYS_setresuid, ruid, euid, suid), SyscallSucceeds());
+ EXPECT_NO_ERRNO(CheckUIDs(ruid, euid, suid));
+ });
+}
+
+TEST(UidGidRootTest, Setresgid) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsRoot()));
+
+ EXPECT_THAT(setresgid(-1, -1, -1), SyscallSucceeds());
+ EXPECT_NO_ERRNO(CheckGIDs(0, 0, 0));
+
+ const gid_t rgid = 12345;
+ const gid_t egid = 23456;
+ const gid_t sgid = 34567;
+ ASSERT_THAT(setresgid(rgid, egid, sgid), SyscallSucceeds());
+ EXPECT_NO_ERRNO(CheckGIDs(rgid, egid, sgid));
+}
+
+TEST(UidGidRootTest, Setgroups) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsRoot()));
+
+ std::vector<gid_t> list = {123, 500};
+ ASSERT_THAT(setgroups(list.size(), list.data()), SyscallSucceeds());
+ std::vector<gid_t> list2(list.size());
+ ASSERT_THAT(getgroups(list2.size(), list2.data()), SyscallSucceeds());
+ EXPECT_THAT(list, UnorderedElementsAreArray(list2));
+
+ // "EFAULT: list has an invalid address."
+ EXPECT_THAT(getgroups(100, reinterpret_cast<gid_t*>(-1)),
+ SyscallFailsWithErrno(EFAULT));
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/uname.cc b/test/syscalls/linux/uname.cc
new file mode 100644
index 000000000..d22a34bd7
--- /dev/null
+++ b/test/syscalls/linux/uname.cc
@@ -0,0 +1,99 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sched.h>
+#include <sys/utsname.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "absl/strings/string_view.h"
+#include "test/util/capability_util.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(UnameTest, Sanity) {
+ struct utsname buf;
+ ASSERT_THAT(uname(&buf), SyscallSucceeds());
+ EXPECT_NE(strlen(buf.release), 0);
+ EXPECT_NE(strlen(buf.version), 0);
+ EXPECT_NE(strlen(buf.machine), 0);
+ EXPECT_NE(strlen(buf.sysname), 0);
+ EXPECT_NE(strlen(buf.nodename), 0);
+ EXPECT_NE(strlen(buf.domainname), 0);
+}
+
+TEST(UnameTest, SetNames) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+ constexpr char kHostname[] = "wubbalubba";
+ ASSERT_THAT(sethostname(kHostname, sizeof(kHostname)), SyscallSucceeds());
+
+ constexpr char kDomainname[] = "dubdub.com";
+ ASSERT_THAT(setdomainname(kDomainname, sizeof(kDomainname)),
+ SyscallSucceeds());
+
+ struct utsname buf;
+ EXPECT_THAT(uname(&buf), SyscallSucceeds());
+ EXPECT_EQ(absl::string_view(buf.nodename), kHostname);
+ EXPECT_EQ(absl::string_view(buf.domainname), kDomainname);
+
+ // These should just be glibc wrappers that also call uname(2).
+ char hostname[65];
+ EXPECT_THAT(gethostname(hostname, sizeof(hostname)), SyscallSucceeds());
+ EXPECT_EQ(absl::string_view(hostname), kHostname);
+
+ char domainname[65];
+ EXPECT_THAT(getdomainname(domainname, sizeof(domainname)), SyscallSucceeds());
+ EXPECT_EQ(absl::string_view(domainname), kDomainname);
+}
+
+TEST(UnameTest, UnprivilegedSetNames) {
+ if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))) {
+ EXPECT_NO_ERRNO(SetCapability(CAP_SYS_ADMIN, false));
+ }
+
+ EXPECT_THAT(sethostname("", 0), SyscallFailsWithErrno(EPERM));
+ EXPECT_THAT(setdomainname("", 0), SyscallFailsWithErrno(EPERM));
+}
+
+TEST(UnameTest, UnshareUTS) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+ struct utsname init;
+ ASSERT_THAT(uname(&init), SyscallSucceeds());
+
+ ScopedThread([&]() {
+ EXPECT_THAT(unshare(CLONE_NEWUTS), SyscallSucceeds());
+
+ constexpr char kHostname[] = "wubbalubba";
+ EXPECT_THAT(sethostname(kHostname, sizeof(kHostname)), SyscallSucceeds());
+
+ char hostname[65];
+ EXPECT_THAT(gethostname(hostname, sizeof(hostname)), SyscallSucceeds());
+ });
+
+ struct utsname after;
+ EXPECT_THAT(uname(&after), SyscallSucceeds());
+ EXPECT_EQ(absl::string_view(after.nodename), init.nodename);
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/unix_domain_socket_test_util.cc b/test/syscalls/linux/unix_domain_socket_test_util.cc
new file mode 100644
index 000000000..2d7a530b9
--- /dev/null
+++ b/test/syscalls/linux/unix_domain_socket_test_util.cc
@@ -0,0 +1,346 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/syscalls/linux/unix_domain_socket_test_util.h"
+
+#include <sys/un.h>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "absl/strings/str_cat.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+std::string DescribeUnixDomainSocketType(int type) {
+ const char* type_str = nullptr;
+ switch (type & ~(SOCK_NONBLOCK | SOCK_CLOEXEC)) {
+ case SOCK_STREAM:
+ type_str = "SOCK_STREAM";
+ break;
+ case SOCK_DGRAM:
+ type_str = "SOCK_DGRAM";
+ break;
+ case SOCK_SEQPACKET:
+ type_str = "SOCK_SEQPACKET";
+ break;
+ }
+ if (!type_str) {
+ return absl::StrCat("Unix domain socket with unknown type ", type);
+ } else {
+ return absl::StrCat(((type & SOCK_NONBLOCK) != 0) ? "non-blocking " : "",
+ ((type & SOCK_CLOEXEC) != 0) ? "close-on-exec " : "",
+ type_str, " Unix domain socket");
+ }
+}
+
+SocketPairKind UnixDomainSocketPair(int type) {
+ return SocketPairKind{DescribeUnixDomainSocketType(type),
+ SyscallSocketPairCreator(AF_UNIX, type, 0)};
+}
+
+SocketPairKind FilesystemBoundUnixDomainSocketPair(int type) {
+ std::string description = absl::StrCat(DescribeUnixDomainSocketType(type),
+ " created with filesystem binding");
+ if ((type & SOCK_DGRAM) == SOCK_DGRAM) {
+ return SocketPairKind{
+ description,
+ FilesystemBidirectionalBindSocketPairCreator(AF_UNIX, type, 0)};
+ }
+ return SocketPairKind{
+ description, FilesystemAcceptBindSocketPairCreator(AF_UNIX, type, 0)};
+}
+
+SocketPairKind AbstractBoundUnixDomainSocketPair(int type) {
+ std::string description = absl::StrCat(DescribeUnixDomainSocketType(type),
+ " created with abstract namespace binding");
+ if ((type & SOCK_DGRAM) == SOCK_DGRAM) {
+ return SocketPairKind{
+ description,
+ AbstractBidirectionalBindSocketPairCreator(AF_UNIX, type, 0)};
+ }
+ return SocketPairKind{description,
+ AbstractAcceptBindSocketPairCreator(AF_UNIX, type, 0)};
+}
+
+SocketPairKind SocketpairGoferUnixDomainSocketPair(int type) {
+ std::string description = absl::StrCat(DescribeUnixDomainSocketType(type),
+ " created with the socketpair gofer");
+ return SocketPairKind{description,
+ SocketpairGoferSocketPairCreator(AF_UNIX, type, 0)};
+}
+
+SocketPairKind SocketpairGoferFileSocketPair(int type) {
+ std::string description =
+ absl::StrCat(((type & O_NONBLOCK) != 0) ? "non-blocking " : "",
+ ((type & O_CLOEXEC) != 0) ? "close-on-exec " : "",
+ "file socket created with the socketpair gofer");
+ return SocketPairKind{description,
+ SocketpairGoferFileSocketPairCreator(type)};
+}
+
+SocketPairKind FilesystemUnboundUnixDomainSocketPair(int type) {
+ return SocketPairKind{absl::StrCat(DescribeUnixDomainSocketType(type),
+ " unbound with a filesystem address"),
+ FilesystemUnboundSocketPairCreator(AF_UNIX, type, 0)};
+}
+
+SocketPairKind AbstractUnboundUnixDomainSocketPair(int type) {
+ return SocketPairKind{
+ absl::StrCat(DescribeUnixDomainSocketType(type),
+ " unbound with an abstract namespace address"),
+ AbstractUnboundSocketPairCreator(AF_UNIX, type, 0)};
+}
+
+void SendSingleFD(int sock, int fd, char buf[], int buf_size) {
+ ASSERT_NO_FATAL_FAILURE(SendFDs(sock, &fd, 1, buf, buf_size));
+}
+
+void SendFDs(int sock, int fds[], int fds_size, char buf[], int buf_size) {
+ struct msghdr msg = {};
+ std::vector<char> control(CMSG_SPACE(fds_size * sizeof(int)));
+ msg.msg_control = &control[0];
+ msg.msg_controllen = control.size();
+
+ struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_len = CMSG_LEN(fds_size * sizeof(int));
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ for (int i = 0; i < fds_size; i++) {
+ memcpy(CMSG_DATA(cmsg) + i * sizeof(int), &fds[i], sizeof(int));
+ }
+
+ ASSERT_THAT(SendMsg(sock, &msg, buf, buf_size),
+ IsPosixErrorOkAndHolds(buf_size));
+}
+
+void RecvSingleFD(int sock, int* fd, char buf[], int buf_size) {
+ ASSERT_NO_FATAL_FAILURE(RecvFDs(sock, fd, 1, buf, buf_size, buf_size));
+}
+
+void RecvSingleFD(int sock, int* fd, char buf[], int buf_size,
+ int expected_size) {
+ ASSERT_NO_FATAL_FAILURE(RecvFDs(sock, fd, 1, buf, buf_size, expected_size));
+}
+
+void RecvFDs(int sock, int fds[], int fds_size, char buf[], int buf_size) {
+ ASSERT_NO_FATAL_FAILURE(
+ RecvFDs(sock, fds, fds_size, buf, buf_size, buf_size));
+}
+
+void RecvFDs(int sock, int fds[], int fds_size, char buf[], int buf_size,
+ int expected_size, bool peek) {
+ struct msghdr msg = {};
+ std::vector<char> control(CMSG_SPACE(fds_size * sizeof(int)));
+ msg.msg_control = &control[0];
+ msg.msg_controllen = control.size();
+
+ struct iovec iov;
+ iov.iov_base = buf;
+ iov.iov_len = buf_size;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ int flags = 0;
+ if (peek) {
+ flags |= MSG_PEEK;
+ }
+
+ ASSERT_THAT(RetryEINTR(recvmsg)(sock, &msg, flags),
+ SyscallSucceedsWithValue(expected_size));
+ struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+ ASSERT_NE(cmsg, nullptr);
+ ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(fds_size * sizeof(int)));
+ ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET);
+ ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS);
+
+ for (int i = 0; i < fds_size; i++) {
+ memcpy(&fds[i], CMSG_DATA(cmsg) + i * sizeof(int), sizeof(int));
+ }
+}
+
+void RecvFDs(int sock, int fds[], int fds_size, char buf[], int buf_size,
+ int expected_size) {
+ ASSERT_NO_FATAL_FAILURE(
+ RecvFDs(sock, fds, fds_size, buf, buf_size, expected_size, false));
+}
+
+void PeekSingleFD(int sock, int* fd, char buf[], int buf_size) {
+ ASSERT_NO_FATAL_FAILURE(RecvFDs(sock, fd, 1, buf, buf_size, buf_size, true));
+}
+
+void RecvNoCmsg(int sock, char buf[], int buf_size, int expected_size) {
+ struct msghdr msg = {};
+ char control[CMSG_SPACE(sizeof(int)) + CMSG_SPACE(sizeof(struct ucred))];
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+
+ struct iovec iov;
+ iov.iov_base = buf;
+ iov.iov_len = buf_size;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ ASSERT_THAT(RetryEINTR(recvmsg)(sock, &msg, 0),
+ SyscallSucceedsWithValue(expected_size));
+ struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+ EXPECT_EQ(cmsg, nullptr);
+}
+
+void SendNullCmsg(int sock, char buf[], int buf_size) {
+ struct msghdr msg = {};
+ msg.msg_control = nullptr;
+ msg.msg_controllen = 0;
+
+ ASSERT_THAT(SendMsg(sock, &msg, buf, buf_size),
+ IsPosixErrorOkAndHolds(buf_size));
+}
+
+void SendCreds(int sock, ucred creds, char buf[], int buf_size) {
+ struct msghdr msg = {};
+
+ char control[CMSG_SPACE(sizeof(struct ucred))];
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+
+ struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_CREDENTIALS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
+ memcpy(CMSG_DATA(cmsg), &creds, sizeof(struct ucred));
+
+ ASSERT_THAT(SendMsg(sock, &msg, buf, buf_size),
+ IsPosixErrorOkAndHolds(buf_size));
+}
+
+void SendCredsAndFD(int sock, ucred creds, int fd, char buf[], int buf_size) {
+ struct msghdr msg = {};
+
+ char control[CMSG_SPACE(sizeof(struct ucred)) + CMSG_SPACE(sizeof(int))] = {};
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+
+ struct cmsghdr* cmsg1 = CMSG_FIRSTHDR(&msg);
+ cmsg1->cmsg_level = SOL_SOCKET;
+ cmsg1->cmsg_type = SCM_CREDENTIALS;
+ cmsg1->cmsg_len = CMSG_LEN(sizeof(struct ucred));
+ memcpy(CMSG_DATA(cmsg1), &creds, sizeof(struct ucred));
+
+ struct cmsghdr* cmsg2 = CMSG_NXTHDR(&msg, cmsg1);
+ cmsg2->cmsg_level = SOL_SOCKET;
+ cmsg2->cmsg_type = SCM_RIGHTS;
+ cmsg2->cmsg_len = CMSG_LEN(sizeof(int));
+ memcpy(CMSG_DATA(cmsg2), &fd, sizeof(int));
+
+ ASSERT_THAT(SendMsg(sock, &msg, buf, buf_size),
+ IsPosixErrorOkAndHolds(buf_size));
+}
+
+void RecvCreds(int sock, ucred* creds, char buf[], int buf_size) {
+ ASSERT_NO_FATAL_FAILURE(RecvCreds(sock, creds, buf, buf_size, buf_size));
+}
+
+void RecvCreds(int sock, ucred* creds, char buf[], int buf_size,
+ int expected_size) {
+ struct msghdr msg = {};
+ char control[CMSG_SPACE(sizeof(struct ucred))];
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+
+ struct iovec iov;
+ iov.iov_base = buf;
+ iov.iov_len = buf_size;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ ASSERT_THAT(RetryEINTR(recvmsg)(sock, &msg, 0),
+ SyscallSucceedsWithValue(expected_size));
+ struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+ ASSERT_NE(cmsg, nullptr);
+ ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(struct ucred)));
+ ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET);
+ ASSERT_EQ(cmsg->cmsg_type, SCM_CREDENTIALS);
+
+ memcpy(creds, CMSG_DATA(cmsg), sizeof(struct ucred));
+}
+
+void RecvCredsAndFD(int sock, ucred* creds, int* fd, char buf[], int buf_size) {
+ struct msghdr msg = {};
+ char control[CMSG_SPACE(sizeof(struct ucred)) + CMSG_SPACE(sizeof(int))];
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+
+ struct iovec iov;
+ iov.iov_base = buf;
+ iov.iov_len = buf_size;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ ASSERT_THAT(RetryEINTR(recvmsg)(sock, &msg, 0),
+ SyscallSucceedsWithValue(buf_size));
+
+ struct cmsghdr* cmsg1 = CMSG_FIRSTHDR(&msg);
+ ASSERT_NE(cmsg1, nullptr);
+ ASSERT_EQ(cmsg1->cmsg_len, CMSG_LEN(sizeof(struct ucred)));
+ ASSERT_EQ(cmsg1->cmsg_level, SOL_SOCKET);
+ ASSERT_EQ(cmsg1->cmsg_type, SCM_CREDENTIALS);
+ memcpy(creds, CMSG_DATA(cmsg1), sizeof(struct ucred));
+
+ struct cmsghdr* cmsg2 = CMSG_NXTHDR(&msg, cmsg1);
+ ASSERT_NE(cmsg2, nullptr);
+ ASSERT_EQ(cmsg2->cmsg_len, CMSG_LEN(sizeof(int)));
+ ASSERT_EQ(cmsg2->cmsg_level, SOL_SOCKET);
+ ASSERT_EQ(cmsg2->cmsg_type, SCM_RIGHTS);
+ memcpy(fd, CMSG_DATA(cmsg2), sizeof(int));
+}
+
+void RecvSingleFDUnaligned(int sock, int* fd, char buf[], int buf_size) {
+ struct msghdr msg = {};
+ char control[CMSG_SPACE(sizeof(int)) - sizeof(int)];
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+
+ struct iovec iov;
+ iov.iov_base = buf;
+ iov.iov_len = buf_size;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ ASSERT_THAT(RetryEINTR(recvmsg)(sock, &msg, 0),
+ SyscallSucceedsWithValue(buf_size));
+
+ struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+ ASSERT_NE(cmsg, nullptr);
+ ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(int)));
+ ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET);
+ ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS);
+
+ memcpy(fd, CMSG_DATA(cmsg), sizeof(int));
+}
+
+void SetSoPassCred(int sock) {
+ int one = 1;
+ EXPECT_THAT(setsockopt(sock, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one)),
+ SyscallSucceeds());
+}
+
+void UnsetSoPassCred(int sock) {
+ int zero = 0;
+ EXPECT_THAT(setsockopt(sock, SOL_SOCKET, SO_PASSCRED, &zero, sizeof(zero)),
+ SyscallSucceeds());
+}
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/unix_domain_socket_test_util.h b/test/syscalls/linux/unix_domain_socket_test_util.h
new file mode 100644
index 000000000..1b09aeae7
--- /dev/null
+++ b/test/syscalls/linux/unix_domain_socket_test_util.h
@@ -0,0 +1,161 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_SYSCALLS_UNIX_DOMAIN_SOCKET_TEST_UTIL_H_
+#define GVISOR_TEST_SYSCALLS_UNIX_DOMAIN_SOCKET_TEST_UTIL_H_
+
+#include <string>
+#include "test/syscalls/linux/socket_test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+// DescribeUnixDomainSocketType returns a human-readable std::string explaining the
+// given Unix domain socket type.
+std::string DescribeUnixDomainSocketType(int type);
+
+// UnixDomainSocketPair returns a SocketPairKind that represents SocketPairs
+// created by invoking the socketpair() syscall with AF_UNIX and the given type.
+SocketPairKind UnixDomainSocketPair(int type);
+
+// FilesystemBoundUnixDomainSocketPair returns a SocketPairKind that represents
+// SocketPairs created with bind() and accept() syscalls with a temp file path,
+// AF_UNIX and the given type.
+SocketPairKind FilesystemBoundUnixDomainSocketPair(int type);
+
+// AbstractBoundUnixDomainSocketPair returns a SocketPairKind that represents
+// SocketPairs created with bind() and accept() syscalls with a temp abstract
+// path, AF_UNIX and the given type.
+SocketPairKind AbstractBoundUnixDomainSocketPair(int type);
+
+// SocketpairGoferUnixDomainSocketPair returns a SocketPairKind that was created
+// with two sockets conected to the socketpair gofer.
+SocketPairKind SocketpairGoferUnixDomainSocketPair(int type);
+
+// SocketpairGoferFileSocketPair returns a SocketPairKind that was created with
+// two open() calls on paths backed by the socketpair gofer.
+SocketPairKind SocketpairGoferFileSocketPair(int type);
+
+// FilesystemUnboundUnixDomainSocketPair returns a SocketPairKind that
+// represents two unbound sockets and a filesystem path for binding.
+SocketPairKind FilesystemUnboundUnixDomainSocketPair(int type);
+
+// AbstractUnboundUnixDomainSocketPair returns a SocketPairKind that represents
+// two unbound sockets and an abstract namespace path for binding.
+SocketPairKind AbstractUnboundUnixDomainSocketPair(int type);
+
+// SendSingleFD sends both a single FD and some data over a unix domain socket
+// specified by an FD. Note that calls to this function must be wrapped in
+// ASSERT_NO_FATAL_FAILURE for internal assertions to halt the test.
+void SendSingleFD(int sock, int fd, char buf[], int buf_size);
+
+// SendFDs sends an arbitrary number of FDs and some data over a unix domain
+// socket specified by an FD. Note that calls to this function must be wrapped
+// in ASSERT_NO_FATAL_FAILURE for internal assertions to halt the test.
+void SendFDs(int sock, int fds[], int fds_size, char buf[], int buf_size);
+
+// RecvSingleFD receives both a single FD and some data over a unix domain
+// socket specified by an FD. Note that calls to this function must be wrapped
+// in ASSERT_NO_FATAL_FAILURE for internal assertions to halt the test.
+void RecvSingleFD(int sock, int* fd, char buf[], int buf_size);
+
+// RecvSingleFD receives both a single FD and some data over a unix domain
+// socket specified by an FD. This version allows the expected amount of data
+// received to be different than the buffer size. Note that calls to this
+// function must be wrapped in ASSERT_NO_FATAL_FAILURE for internal assertions
+// to halt the test.
+void RecvSingleFD(int sock, int* fd, char buf[], int buf_size,
+ int expected_size);
+
+// PeekSingleFD peeks at both a single FD and some data over a unix domain
+// socket specified by an FD. Note that calls to this function must be wrapped
+// in ASSERT_NO_FATAL_FAILURE for internal assertions to halt the test.
+void PeekSingleFD(int sock, int* fd, char buf[], int buf_size);
+
+// RecvFDs receives both an arbitrary number of FDs and some data over a unix
+// domain socket specified by an FD. Note that calls to this function must be
+// wrapped in ASSERT_NO_FATAL_FAILURE for internal assertions to halt the test.
+void RecvFDs(int sock, int fds[], int fds_size, char buf[], int buf_size);
+
+// RecvFDs receives both an arbitrary number of FDs and some data over a unix
+// domain socket specified by an FD. This version allows the expected amount of
+// data received to be different than the buffer size. Note that calls to this
+// function must be wrapped in ASSERT_NO_FATAL_FAILURE for internal assertions
+// to halt the test.
+void RecvFDs(int sock, int fds[], int fds_size, char buf[], int buf_size,
+ int expected_size);
+
+// RecvNoCmsg receives some data over a unix domain socket specified by an FD
+// and asserts that no control messages are available for receiving. Note that
+// calls to this function must be wrapped in ASSERT_NO_FATAL_FAILURE for
+// internal assertions to halt the test.
+void RecvNoCmsg(int sock, char buf[], int buf_size, int expected_size);
+
+inline void RecvNoCmsg(int sock, char buf[], int buf_size) {
+ RecvNoCmsg(sock, buf, buf_size, buf_size);
+}
+
+// SendCreds sends the credentials of the current process and some data over a
+// unix domain socket specified by an FD. Note that calls to this function must
+// be wrapped in ASSERT_NO_FATAL_FAILURE for internal assertions to halt the
+// test.
+void SendCreds(int sock, ucred creds, char buf[], int buf_size);
+
+// SendCredsAndFD sends the credentials of the current process, a single FD, and
+// some data over a unix domain socket specified by an FD. Note that calls to
+// this function must be wrapped in ASSERT_NO_FATAL_FAILURE for internal
+// assertions to halt the test.
+void SendCredsAndFD(int sock, ucred creds, int fd, char buf[], int buf_size);
+
+// RecvCreds receives some credentials and some data over a unix domain socket
+// specified by an FD. Note that calls to this function must be wrapped in
+// ASSERT_NO_FATAL_FAILURE for internal assertions to halt the test.
+void RecvCreds(int sock, ucred* creds, char buf[], int buf_size);
+
+// RecvCreds receives some credentials and some data over a unix domain socket
+// specified by an FD. This version allows the expected amount of data received
+// to be different than the buffer size. Note that calls to this function must
+// be wrapped in ASSERT_NO_FATAL_FAILURE for internal assertions to halt the
+// test.
+void RecvCreds(int sock, ucred* creds, char buf[], int buf_size,
+ int expected_size);
+
+// RecvCredsAndFD receives some credentials, a single FD, and some data over a
+// unix domain socket specified by an FD. Note that calls to this function must
+// be wrapped in ASSERT_NO_FATAL_FAILURE for internal assertions to halt the
+// test.
+void RecvCredsAndFD(int sock, ucred* creds, int* fd, char buf[], int buf_size);
+
+// SendNullCmsg sends a null control message and some data over a unix domain
+// socket specified by an FD. Note that calls to this function must be wrapped
+// in ASSERT_NO_FATAL_FAILURE for internal assertions to halt the test.
+void SendNullCmsg(int sock, char buf[], int buf_size);
+
+// RecvSingleFDUnaligned sends both a single FD and some data over a unix domain
+// socket specified by an FD. This function does not obey the spec, but Linux
+// allows it and the apphosting code depends on this quirk. Note that calls to
+// this function must be wrapped in ASSERT_NO_FATAL_FAILURE for internal
+// assertions to halt the test.
+void RecvSingleFDUnaligned(int sock, int* fd, char buf[], int buf_size);
+
+// SetSoPassCred sets the SO_PASSCRED option on the specified socket.
+void SetSoPassCred(int sock);
+
+// UnsetSoPassCred clears the SO_PASSCRED option on the specified socket.
+void UnsetSoPassCred(int sock);
+
+} // namespace testing
+} // namespace gvisor
+
+#endif // GVISOR_TEST_SYSCALLS_UNIX_DOMAIN_SOCKET_TEST_UTIL_H_
diff --git a/test/syscalls/linux/unlink.cc b/test/syscalls/linux/unlink.cc
new file mode 100644
index 000000000..4d5e0c6b6
--- /dev/null
+++ b/test/syscalls/linux/unlink.cc
@@ -0,0 +1,211 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "absl/strings/str_cat.h"
+#include "test/util/capability_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(UnlinkTest, IsDir) {
+ auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+ EXPECT_THAT(unlink(dir.path().c_str()), SyscallFailsWithErrno(EISDIR));
+}
+
+TEST(UnlinkTest, DirNotEmpty) {
+ auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+ int fd;
+ std::string path = JoinPath(dir.path(), "ExistingFile");
+ EXPECT_THAT(fd = open(path.c_str(), O_RDWR | O_CREAT, 0666),
+ SyscallSucceeds());
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+ EXPECT_THAT(rmdir(dir.path().c_str()), SyscallFailsWithErrno(ENOTEMPTY));
+}
+
+TEST(UnlinkTest, Rmdir) {
+ std::string path = JoinPath(GetAbsoluteTestTmpdir(), "NewDir");
+ ASSERT_THAT(mkdir(path.c_str(), 0755), SyscallSucceeds());
+ EXPECT_THAT(rmdir(path.c_str()), SyscallSucceeds());
+}
+
+TEST(UnlinkTest, AtDir) {
+ int dirfd;
+ EXPECT_THAT(dirfd = open(GetAbsoluteTestTmpdir().c_str(), O_DIRECTORY, 0),
+ SyscallSucceeds());
+
+ std::string path = JoinPath(GetAbsoluteTestTmpdir(), "NewDir");
+ EXPECT_THAT(mkdir(path.c_str(), 0755), SyscallSucceeds());
+ EXPECT_THAT(unlinkat(dirfd, "NewDir", AT_REMOVEDIR), SyscallSucceeds());
+ ASSERT_THAT(close(dirfd), SyscallSucceeds());
+}
+
+TEST(UnlinkTest, AtDirDegradedPermissions_NoRandomSave) {
+ // Drop capabilities that allow us to override file and directory permissions.
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+ auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+ int dirfd;
+ ASSERT_THAT(dirfd = open(dir.path().c_str(), O_DIRECTORY, 0),
+ SyscallSucceeds());
+
+ std::string sub_dir = JoinPath(dir.path(), "NewDir");
+ EXPECT_THAT(mkdir(sub_dir.c_str(), 0755), SyscallSucceeds());
+ EXPECT_THAT(fchmod(dirfd, 0444), SyscallSucceeds());
+ EXPECT_THAT(unlinkat(dirfd, "NewDir", AT_REMOVEDIR),
+ SyscallFailsWithErrno(EACCES));
+ ASSERT_THAT(close(dirfd), SyscallSucceeds());
+}
+
+// Files cannot be unlinked if the parent is not writable and executable.
+TEST(UnlinkTest, ParentDegradedPermissions) {
+ // Drop capabilities that allow us to override file and directory permissions.
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false));
+ ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false));
+
+ auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path()));
+
+ ASSERT_THAT(chmod(dir.path().c_str(), 0000), SyscallSucceeds());
+
+ struct stat st;
+ ASSERT_THAT(stat(file.path().c_str(), &st), SyscallFailsWithErrno(EACCES));
+ ASSERT_THAT(unlinkat(AT_FDCWD, file.path().c_str(), 0),
+ SyscallFailsWithErrno(EACCES));
+
+ // Non-existent files also return EACCES.
+ const std::string nonexist = JoinPath(dir.path(), "doesnotexist");
+ ASSERT_THAT(stat(nonexist.c_str(), &st), SyscallFailsWithErrno(EACCES));
+ ASSERT_THAT(unlinkat(AT_FDCWD, nonexist.c_str(), 0),
+ SyscallFailsWithErrno(EACCES));
+}
+
+TEST(UnlinkTest, AtBad) {
+ int dirfd;
+ EXPECT_THAT(dirfd = open(GetAbsoluteTestTmpdir().c_str(), O_DIRECTORY, 0),
+ SyscallSucceeds());
+
+ // Try removing a directory as a file.
+ std::string path = JoinPath(GetAbsoluteTestTmpdir(), "NewDir");
+ EXPECT_THAT(mkdir(path.c_str(), 0755), SyscallSucceeds());
+ EXPECT_THAT(unlinkat(dirfd, "NewDir", 0), SyscallFailsWithErrno(EISDIR));
+ EXPECT_THAT(unlinkat(dirfd, "NewDir", AT_REMOVEDIR), SyscallSucceeds());
+
+ // Try removing a file as a directory.
+ int fd;
+ EXPECT_THAT(fd = openat(dirfd, "UnlinkAtFile", O_RDWR | O_CREAT, 0666),
+ SyscallSucceeds());
+ EXPECT_THAT(unlinkat(dirfd, "UnlinkAtFile", AT_REMOVEDIR),
+ SyscallFailsWithErrno(ENOTDIR));
+ ASSERT_THAT(close(fd), SyscallSucceeds());
+ EXPECT_THAT(unlinkat(dirfd, "UnlinkAtFile", 0), SyscallSucceeds());
+
+ // Cleanup.
+ ASSERT_THAT(close(dirfd), SyscallSucceeds());
+}
+
+TEST(UnlinkTest, AbsTmpFile) {
+ int fd;
+ std::string path = JoinPath(GetAbsoluteTestTmpdir(), "ExistingFile");
+ EXPECT_THAT(fd = open(path.c_str(), O_RDWR | O_CREAT, 0666),
+ SyscallSucceeds());
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+ EXPECT_THAT(unlink(path.c_str()), SyscallSucceeds());
+}
+
+TEST(UnlinkTest, TooLongName) {
+ EXPECT_THAT(unlink(std::vector<char>(16384, '0').data()),
+ SyscallFailsWithErrno(ENAMETOOLONG));
+}
+
+TEST(UnlinkTest, BadNamePtr) {
+ EXPECT_THAT(unlink(reinterpret_cast<char*>(1)),
+ SyscallFailsWithErrno(EFAULT));
+}
+
+TEST(UnlinkTest, AtFile) {
+ int dirfd;
+ EXPECT_THAT(dirfd = open(GetAbsoluteTestTmpdir().c_str(), O_DIRECTORY, 0666),
+ SyscallSucceeds());
+ int fd;
+ EXPECT_THAT(fd = openat(dirfd, "UnlinkAtFile", O_RDWR | O_CREAT, 0666),
+ SyscallSucceeds());
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+ EXPECT_THAT(unlinkat(dirfd, "UnlinkAtFile", 0), SyscallSucceeds());
+}
+
+TEST(UnlinkTest, OpenFile) {
+ // We can't save unlinked file unless they are on tmpfs.
+ const DisableSave ds;
+ auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ int fd;
+ EXPECT_THAT(fd = open(file.path().c_str(), O_RDWR, 0666), SyscallSucceeds());
+ EXPECT_THAT(unlink(file.path().c_str()), SyscallSucceeds());
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+TEST(UnlinkTest, CannotRemoveDots) {
+ auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ const std::string self = JoinPath(file.path(), ".");
+ ASSERT_THAT(unlink(self.c_str()), SyscallFailsWithErrno(ENOTDIR));
+ const std::string parent = JoinPath(file.path(), "..");
+ ASSERT_THAT(unlink(parent.c_str()), SyscallFailsWithErrno(ENOTDIR));
+}
+
+TEST(UnlinkTest, CannotRemoveRoot) {
+ ASSERT_THAT(unlinkat(-1, "/", AT_REMOVEDIR), SyscallFailsWithErrno(EBUSY));
+}
+
+TEST(UnlinkTest, CannotRemoveRootWithAtDir) {
+ const FileDescriptor dirfd = ASSERT_NO_ERRNO_AND_VALUE(
+ Open(GetAbsoluteTestTmpdir(), O_DIRECTORY, 0666));
+ ASSERT_THAT(unlinkat(dirfd.get(), "/", AT_REMOVEDIR),
+ SyscallFailsWithErrno(EBUSY));
+}
+
+TEST(RmdirTest, CannotRemoveDots) {
+ auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const std::string self = JoinPath(dir.path(), ".");
+ ASSERT_THAT(rmdir(self.c_str()), SyscallFailsWithErrno(EINVAL));
+ const std::string parent = JoinPath(dir.path(), "..");
+ ASSERT_THAT(rmdir(parent.c_str()), SyscallFailsWithErrno(ENOTEMPTY));
+}
+
+TEST(RmdirTest, CanRemoveWithTrailingSlashes) {
+ auto dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const std::string slash = absl::StrCat(dir1.path(), "/");
+ ASSERT_THAT(rmdir(slash.c_str()), SyscallSucceeds());
+ auto dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const std::string slashslash = absl::StrCat(dir2.path(), "//");
+ ASSERT_THAT(rmdir(slashslash.c_str()), SyscallSucceeds());
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/unshare.cc b/test/syscalls/linux/unshare.cc
new file mode 100644
index 000000000..9dd6ec4b6
--- /dev/null
+++ b/test/syscalls/linux/unshare.cc
@@ -0,0 +1,50 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <sched.h>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/synchronization/mutex.h"
+#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+TEST(UnshareTest, AllowsZeroFlags) {
+ ASSERT_THAT(unshare(0), SyscallSucceeds());
+}
+
+TEST(UnshareTest, ThreadFlagFailsIfMultithreaded) {
+ absl::Mutex mu;
+ bool finished = false;
+ ScopedThread t([&] {
+ mu.Lock();
+ mu.Await(absl::Condition(&finished));
+ mu.Unlock();
+ });
+ ASSERT_THAT(unshare(CLONE_THREAD), SyscallFailsWithErrno(EINVAL));
+ mu.Lock();
+ finished = true;
+ mu.Unlock();
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/utimes.cc b/test/syscalls/linux/utimes.cc
new file mode 100644
index 000000000..d95ee74ec
--- /dev/null
+++ b/test/syscalls/linux/utimes.cc
@@ -0,0 +1,330 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+#include <utime.h>
+#include <string>
+
+#include "absl/time/time.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/fs_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// TODO: utimes(nullptr) does not pick the "now" time in the
+// application's time domain, so when asserting that times are within a window,
+// we expand the window to allow for differences between the time domains.
+constexpr absl::Duration kClockSlack = absl::Milliseconds(100);
+
+// TimeBoxed runs fn, setting before and after to (coarse realtime) times
+// guaranteed* to come before and after fn started and completed, respectively.
+//
+// fn may be called more than once if the clock is adjusted.
+//
+// * See the comment on kClockSlack. gVisor breaks this guarantee.
+void TimeBoxed(absl::Time* before, absl::Time* after,
+ std::function<void()> const& fn) {
+ do {
+ // N.B. utimes and friends use CLOCK_REALTIME_COARSE for setting time (i.e.,
+ // current_kernel_time()). See fs/attr.c:notify_change.
+ //
+ // notify_change truncates the time to a multiple of s_time_gran, but most
+ // filesystems set it to 1, so we don't do any truncation.
+ struct timespec ts;
+ EXPECT_THAT(clock_gettime(CLOCK_REALTIME_COARSE, &ts), SyscallSucceeds());
+ *before = absl::TimeFromTimespec(ts);
+
+ fn();
+
+ EXPECT_THAT(clock_gettime(CLOCK_REALTIME_COARSE, &ts), SyscallSucceeds());
+ *after = absl::TimeFromTimespec(ts);
+
+ if (*after < *before) {
+ // Clock jumped backwards; retry.
+ //
+ // Technically this misses jumps small enough to keep after > before,
+ // which could lead to test failures, but that is very unlikely to happen.
+ continue;
+ }
+
+ if (IsRunningOnGvisor()) {
+ // See comment on kClockSlack.
+ *before -= kClockSlack;
+ *after += kClockSlack;
+ }
+ } while (*after < *before);
+}
+
+void TestUtimesOnPath(std::string const& path) {
+ struct stat statbuf;
+
+ struct timeval times[2] = {{1, 0}, {2, 0}};
+ EXPECT_THAT(utimes(path.c_str(), times), SyscallSucceeds());
+ EXPECT_THAT(stat(path.c_str(), &statbuf), SyscallSucceeds());
+ EXPECT_EQ(1, statbuf.st_atime);
+ EXPECT_EQ(2, statbuf.st_mtime);
+
+ absl::Time before;
+ absl::Time after;
+ TimeBoxed(&before, &after, [&] {
+ EXPECT_THAT(utimes(path.c_str(), nullptr), SyscallSucceeds());
+ });
+
+ EXPECT_THAT(stat(path.c_str(), &statbuf), SyscallSucceeds());
+
+ absl::Time atime = absl::TimeFromTimespec(statbuf.st_atim);
+ EXPECT_GE(atime, before);
+ EXPECT_LE(atime, after);
+
+ absl::Time mtime = absl::TimeFromTimespec(statbuf.st_mtim);
+ EXPECT_GE(mtime, before);
+ EXPECT_LE(mtime, after);
+}
+
+TEST(UtimesTest, OnFile) {
+ auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ TestUtimesOnPath(f.path());
+}
+
+TEST(UtimesTest, OnDir) {
+ auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ TestUtimesOnPath(dir.path());
+}
+
+TEST(UtimesTest, MissingPath) {
+ auto path = NewTempAbsPath();
+ struct timeval times[2] = {{1, 0}, {2, 0}};
+ EXPECT_THAT(utimes(path.c_str(), times), SyscallFailsWithErrno(ENOENT));
+}
+
+void TestFutimesat(int dirFd, std::string const& path) {
+ struct stat statbuf;
+
+ struct timeval times[2] = {{1, 0}, {2, 0}};
+ EXPECT_THAT(futimesat(dirFd, path.c_str(), times), SyscallSucceeds());
+ EXPECT_THAT(fstatat(dirFd, path.c_str(), &statbuf, 0), SyscallSucceeds());
+ EXPECT_EQ(1, statbuf.st_atime);
+ EXPECT_EQ(2, statbuf.st_mtime);
+
+ absl::Time before;
+ absl::Time after;
+ TimeBoxed(&before, &after, [&] {
+ EXPECT_THAT(futimesat(dirFd, path.c_str(), nullptr), SyscallSucceeds());
+ });
+
+ EXPECT_THAT(fstatat(dirFd, path.c_str(), &statbuf, 0), SyscallSucceeds());
+
+ absl::Time atime = absl::TimeFromTimespec(statbuf.st_atim);
+ EXPECT_GE(atime, before);
+ EXPECT_LE(atime, after);
+
+ absl::Time mtime = absl::TimeFromTimespec(statbuf.st_mtim);
+ EXPECT_GE(mtime, before);
+ EXPECT_LE(mtime, after);
+}
+
+TEST(FutimesatTest, OnAbsPath) {
+ auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ TestFutimesat(0, f.path());
+}
+
+TEST(FutimesatTest, OnRelPath) {
+ auto d = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(d.path()));
+ auto basename = std::string(Basename(f.path()));
+ const FileDescriptor dirFd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(d.path(), O_RDONLY | O_DIRECTORY));
+ TestFutimesat(dirFd.get(), basename);
+}
+
+TEST(FutimesatTest, InvalidNsec) {
+ auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ struct timeval times[4][2] = {{
+ {0, 1}, // Valid
+ {1, static_cast<int64_t>(1e7)} // Invalid
+ },
+ {
+ {1, static_cast<int64_t>(1e7)}, // Invalid
+ {0, 1} // Valid
+ },
+ {
+ {0, 1}, // Valid
+ {1, -1} // Invalid
+ },
+ {
+ {1, -1}, // Invalid
+ {0, 1} // Valid
+ }};
+
+ for (unsigned int i = 0; i < sizeof(times) / sizeof(times[0]); i++) {
+ std::cout << "test:" << i << "\n";
+ EXPECT_THAT(futimesat(0, f.path().c_str(), times[i]),
+ SyscallFailsWithErrno(EINVAL));
+ }
+}
+
+void TestUtimensat(int dirFd, std::string const& path) {
+ struct stat statbuf;
+ const struct timespec times[2] = {{1, 0}, {2, 0}};
+ EXPECT_THAT(utimensat(dirFd, path.c_str(), times, 0), SyscallSucceeds());
+ EXPECT_THAT(fstatat(dirFd, path.c_str(), &statbuf, 0), SyscallSucceeds());
+ EXPECT_EQ(1, statbuf.st_atime);
+ EXPECT_EQ(2, statbuf.st_mtime);
+
+ // Test setting with UTIME_NOW and UTIME_OMIT.
+ struct stat statbuf2;
+ const struct timespec times2[2] = {
+ {0, UTIME_NOW}, // Should set atime to now.
+ {0, UTIME_OMIT} // Should not change mtime.
+ };
+
+ absl::Time before;
+ absl::Time after;
+ TimeBoxed(&before, &after, [&] {
+ EXPECT_THAT(utimensat(dirFd, path.c_str(), times2, 0), SyscallSucceeds());
+ });
+
+ EXPECT_THAT(fstatat(dirFd, path.c_str(), &statbuf2, 0), SyscallSucceeds());
+
+ absl::Time atime2 = absl::TimeFromTimespec(statbuf2.st_atim);
+ EXPECT_GE(atime2, before);
+ EXPECT_LE(atime2, after);
+
+ absl::Time mtime = absl::TimeFromTimespec(statbuf.st_mtim);
+ absl::Time mtime2 = absl::TimeFromTimespec(statbuf2.st_mtim);
+ // mtime should not be changed.
+ EXPECT_EQ(mtime, mtime2);
+
+ // Test setting with times = NULL. Should set both atime and mtime to the
+ // current system time.
+ struct stat statbuf3;
+ TimeBoxed(&before, &after, [&] {
+ EXPECT_THAT(utimensat(dirFd, path.c_str(), nullptr, 0), SyscallSucceeds());
+ });
+
+ EXPECT_THAT(fstatat(dirFd, path.c_str(), &statbuf3, 0), SyscallSucceeds());
+
+ absl::Time atime3 = absl::TimeFromTimespec(statbuf3.st_atim);
+ EXPECT_GE(atime3, before);
+ EXPECT_LE(atime3, after);
+
+ absl::Time mtime3 = absl::TimeFromTimespec(statbuf3.st_mtim);
+ EXPECT_GE(mtime3, before);
+ EXPECT_LE(mtime3, after);
+
+ if (!IsRunningOnGvisor()) {
+ // FIXME: Gofers set atime and mtime to different "now" times.
+ EXPECT_EQ(atime3, mtime3);
+ }
+}
+
+TEST(UtimensatTest, OnAbsPath) {
+ auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ TestUtimensat(0, f.path());
+}
+
+TEST(UtimensatTest, OnRelPath) {
+ auto d = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(d.path()));
+ auto basename = std::string(Basename(f.path()));
+ const FileDescriptor dirFd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(d.path(), O_RDONLY | O_DIRECTORY));
+ TestUtimensat(dirFd.get(), basename);
+}
+
+TEST(UtimensatTest, OmitNoop) {
+ // Setting both timespecs to UTIME_OMIT on a nonexistant path should succeed.
+ auto path = NewTempAbsPath();
+ const struct timespec times[2] = {{0, UTIME_OMIT}, {0, UTIME_OMIT}};
+ EXPECT_THAT(utimensat(0, path.c_str(), times, 0), SyscallSucceeds());
+}
+
+// Verify that we can actually set atime and mtime to 0.
+TEST(UtimeTest, ZeroAtimeandMtime) {
+ const auto tmp_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ const auto tmp_file =
+ ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(tmp_dir.path()));
+
+ // Stat the file before and after updating atime and mtime.
+ struct stat stat_before = {};
+ EXPECT_THAT(stat(tmp_file.path().c_str(), &stat_before), SyscallSucceeds());
+
+ ASSERT_NE(stat_before.st_atime, 0);
+ ASSERT_NE(stat_before.st_mtime, 0);
+
+ const struct utimbuf times = {}; // Zero for both atime and mtime.
+ EXPECT_THAT(utime(tmp_file.path().c_str(), &times), SyscallSucceeds());
+
+ struct stat stat_after = {};
+ EXPECT_THAT(stat(tmp_file.path().c_str(), &stat_after), SyscallSucceeds());
+
+ // We should see the atime and mtime changed when we set them to 0.
+ ASSERT_EQ(stat_after.st_atime, 0);
+ ASSERT_EQ(stat_after.st_mtime, 0);
+}
+
+TEST(UtimensatTest, InvalidNsec) {
+ auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ struct timespec times[2][2] = {{
+ {0, UTIME_OMIT}, // Valid
+ {2, static_cast<int64_t>(1e10)} // Invalid
+ },
+ {
+ {2, static_cast<int64_t>(1e10)}, // Invalid
+ {0, UTIME_OMIT} // Valid
+ }};
+
+ for (unsigned int i = 0; i < sizeof(times) / sizeof(times[0]); i++) {
+ std::cout << "test:" << i << "\n";
+ EXPECT_THAT(utimensat(0, f.path().c_str(), times[i], 0),
+ SyscallFailsWithErrno(EINVAL));
+ }
+}
+
+TEST(Utimensat, NullPath) {
+ // From man utimensat(2):
+ // "the Linux utimensat() system call implements a nonstandard feature: if
+ // pathname is NULL, then the call modifies the timestamps of the file
+ // referred to by the file descriptor dirfd (which may refer to any type of
+ // file).
+ // Note, however, that the glibc wrapper for utimensat() disallows
+ // passing NULL as the value for file: the wrapper function returns the error
+ // EINVAL in this case."
+ auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDWR));
+ struct stat statbuf;
+ const struct timespec times[2] = {{1, 0}, {2, 0}};
+ // Call syscall directly.
+ EXPECT_THAT(syscall(SYS_utimensat, fd.get(), NULL, times, 0),
+ SyscallSucceeds());
+ EXPECT_THAT(fstatat(0, f.path().c_str(), &statbuf, 0), SyscallSucceeds());
+ EXPECT_EQ(1, statbuf.st_atime);
+ EXPECT_EQ(2, statbuf.st_mtime);
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/vdso.cc b/test/syscalls/linux/vdso.cc
new file mode 100644
index 000000000..0f6e1c7c6
--- /dev/null
+++ b/test/syscalls/linux/vdso.cc
@@ -0,0 +1,48 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string.h>
+#include <sys/mman.h>
+
+#include <algorithm>
+
+#include "gtest/gtest.h"
+#include "test/util/fs_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/proc_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Ensure that the vvar page cannot be made writable.
+TEST(VvarTest, WriteVvar) {
+ auto contents = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps"));
+ auto maps = ASSERT_NO_ERRNO_AND_VALUE(ParseProcMaps(contents));
+ auto it = std::find_if(maps.begin(), maps.end(), [](const ProcMapsEntry& e) {
+ return e.filename == "[vvar]";
+ });
+
+ SKIP_IF(it == maps.end());
+ EXPECT_THAT(mprotect(reinterpret_cast<void*>(it->start), kPageSize,
+ PROT_READ | PROT_WRITE),
+ SyscallFailsWithErrno(EACCES));
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/vdso_clock_gettime.cc b/test/syscalls/linux/vdso_clock_gettime.cc
new file mode 100644
index 000000000..59dd78833
--- /dev/null
+++ b/test/syscalls/linux/vdso_clock_gettime.cc
@@ -0,0 +1,104 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdint.h>
+#include <sys/time.h>
+#include <syscall.h>
+#include <time.h>
+#include <unistd.h>
+#include <map>
+#include <string>
+#include <utility>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/strings/numbers.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+std::string PrintClockId(::testing::TestParamInfo<clockid_t> info) {
+ switch (info.param) {
+ case CLOCK_MONOTONIC:
+ return "CLOCK_MONOTONIC";
+ case CLOCK_REALTIME:
+ return "CLOCK_REALTIME";
+ default:
+ return absl::StrCat(info.param);
+ }
+}
+
+class CorrectVDSOClockTest : public ::testing::TestWithParam<clockid_t> {};
+
+TEST_P(CorrectVDSOClockTest, IsCorrect) {
+ struct timespec tvdso, tsys;
+ absl::Time vdso_time, sys_time;
+ uint64_t total_calls = 0;
+
+ // It is expected that 82.5% of clock_gettime calls will be less than 100us
+ // skewed from the system time.
+ // Unfortunately this is not only influenced by the VDSO clock skew, but also
+ // by arbitrary scheduling delays and the like. The test is therefore
+ // regularly disabled.
+ std::map<absl::Duration, std::tuple<double, uint64_t, uint64_t>> confidence =
+ {
+ {absl::Microseconds(100), std::make_tuple(0.825, 0, 0)},
+ {absl::Microseconds(250), std::make_tuple(0.94, 0, 0)},
+ {absl::Milliseconds(1), std::make_tuple(0.999, 0, 0)},
+ };
+
+ absl::Time start = absl::Now();
+ while (absl::Now() < start + absl::Seconds(30)) {
+ EXPECT_THAT(clock_gettime(GetParam(), &tvdso), SyscallSucceeds());
+ EXPECT_THAT(syscall(__NR_clock_gettime, GetParam(), &tsys),
+ SyscallSucceeds());
+
+ vdso_time = absl::TimeFromTimespec(tvdso);
+
+ for (auto const& conf : confidence) {
+ std::get<1>(confidence[conf.first]) +=
+ (sys_time - vdso_time) < conf.first;
+ }
+
+ sys_time = absl::TimeFromTimespec(tsys);
+
+ for (auto const& conf : confidence) {
+ std::get<2>(confidence[conf.first]) +=
+ (vdso_time - sys_time) < conf.first;
+ }
+
+ ++total_calls;
+ }
+
+ for (auto const& conf : confidence) {
+ EXPECT_GE(std::get<1>(conf.second) / static_cast<double>(total_calls),
+ std::get<0>(conf.second));
+ EXPECT_GE(std::get<2>(conf.second) / static_cast<double>(total_calls),
+ std::get<0>(conf.second));
+ }
+}
+
+INSTANTIATE_TEST_CASE_P(ClockGettime, CorrectVDSOClockTest,
+ ::testing::Values(CLOCK_MONOTONIC, CLOCK_REALTIME),
+ PrintClockId);
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/vfork.cc b/test/syscalls/linux/vfork.cc
new file mode 100644
index 000000000..9999a909e
--- /dev/null
+++ b/test/syscalls/linux/vfork.cc
@@ -0,0 +1,193 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <string>
+#include <utility>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/time/time.h"
+#include "test/util/logging.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/test_util.h"
+
+DEFINE_bool(vfork_test_child, false,
+ "If true, run the VforkTest child workload.");
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// We don't test with raw CLONE_VFORK to avoid interacting with glibc's use of
+// TLS.
+//
+// Even with vfork(2), we must be careful to do little more in the child than
+// call execve(2). We use the simplest sleep function possible, though this is
+// still precarious, as we're officially only allowed to call execve(2) and
+// _exit(2).
+constexpr absl::Duration kChildDelay = absl::Seconds(10);
+
+// Exit code for successful child subprocesses. We don't want to use 0 since
+// it's too common, and an execve(2) failure causes the child to exit with the
+// errno, so kChildExitCode is chosen to be an unlikely errno:
+constexpr int kChildExitCode = 118; // ENOTNAM: Not a XENIX named type file
+
+int64_t MonotonicNow() {
+ struct timespec now;
+ TEST_PCHECK(clock_gettime(CLOCK_MONOTONIC, &now) == 0);
+ return now.tv_sec * 1000000000ll + now.tv_nsec;
+}
+
+TEST(VforkTest, ParentStopsUntilChildExits) {
+ const auto test = [] {
+ // N.B. Run the test in a single-threaded subprocess because
+ // vfork is not safe in a multi-threaded process.
+
+ const int64_t start = MonotonicNow();
+
+ pid_t pid = vfork();
+ if (pid == 0) {
+ SleepSafe(kChildDelay);
+ _exit(kChildExitCode);
+ }
+ TEST_PCHECK_MSG(pid > 0, "vfork failed");
+ MaybeSave();
+
+ const int64_t end = MonotonicNow();
+
+ absl::Duration dur = absl::Nanoseconds(end - start);
+
+ TEST_CHECK(dur >= kChildDelay);
+
+ int status = 0;
+ TEST_PCHECK(RetryEINTR(waitpid)(pid, &status, 0));
+ TEST_CHECK(WIFEXITED(status));
+ TEST_CHECK(WEXITSTATUS(status) == kChildExitCode);
+ };
+
+ EXPECT_THAT(InForkedProcess(test), IsPosixErrorOkAndHolds(0));
+}
+
+TEST(VforkTest, ParentStopsUntilChildExecves_NoRandomSave) {
+ ExecveArray const owned_child_argv = {"/proc/self/exe", "--vfork_test_child"};
+ char* const* const child_argv = owned_child_argv.get();
+
+ const auto test = [&] {
+ const int64_t start = MonotonicNow();
+
+ pid_t pid = vfork();
+ if (pid == 0) {
+ SleepSafe(kChildDelay);
+ execve(child_argv[0], child_argv, /* envp = */ nullptr);
+ _exit(errno);
+ }
+ // Don't attempt save/restore until after recording end_time,
+ // since the test expects an upper bound on the time spent
+ // stopped.
+ int saved_errno = errno;
+ const int64_t end = MonotonicNow();
+ errno = saved_errno;
+ TEST_PCHECK_MSG(pid > 0, "vfork failed");
+ MaybeSave();
+
+ absl::Duration dur = absl::Nanoseconds(end - start);
+
+ // The parent should resume execution after execve, but before
+ // the post-execve test child exits.
+ TEST_CHECK(dur >= kChildDelay);
+ TEST_CHECK(dur <= 2 * kChildDelay);
+
+ int status = 0;
+ TEST_PCHECK(RetryEINTR(waitpid)(pid, &status, 0));
+ TEST_CHECK(WIFEXITED(status));
+ TEST_CHECK(WEXITSTATUS(status) == kChildExitCode);
+ };
+
+ EXPECT_THAT(InForkedProcess(test), IsPosixErrorOkAndHolds(0));
+}
+
+// A vfork child does not unstop the parent a second time when it exits after
+// exec.
+TEST(VforkTest, ExecedChildExitDoesntUnstopParent_NoRandomSave) {
+ ExecveArray const owned_child_argv = {"/proc/self/exe", "--vfork_test_child"};
+ char* const* const child_argv = owned_child_argv.get();
+
+ const auto test = [&] {
+ pid_t pid1 = vfork();
+ if (pid1 == 0) {
+ execve(child_argv[0], child_argv, /* envp = */ nullptr);
+ _exit(errno);
+ }
+ TEST_PCHECK_MSG(pid1 > 0, "vfork failed");
+ MaybeSave();
+
+ // pid1 exec'd and is now sleeping.
+ SleepSafe(kChildDelay / 2);
+
+ const int64_t start = MonotonicNow();
+
+ pid_t pid2 = vfork();
+ if (pid2 == 0) {
+ SleepSafe(kChildDelay);
+ _exit(kChildExitCode);
+ }
+ TEST_PCHECK_MSG(pid2 > 0, "vfork failed");
+ MaybeSave();
+
+ const int64_t end = MonotonicNow();
+
+ absl::Duration dur = absl::Nanoseconds(end - start);
+
+ // The parent should resume execution only after pid2 exits, not
+ // when pid1 exits.
+ TEST_CHECK(dur >= kChildDelay);
+
+ int status = 0;
+ TEST_PCHECK(RetryEINTR(waitpid)(pid1, &status, 0));
+ TEST_CHECK(WIFEXITED(status));
+ TEST_CHECK(WEXITSTATUS(status) == kChildExitCode);
+
+ TEST_PCHECK(RetryEINTR(waitpid)(pid2, &status, 0));
+ TEST_CHECK(WIFEXITED(status));
+ TEST_CHECK(WEXITSTATUS(status) == kChildExitCode);
+ };
+
+ EXPECT_THAT(InForkedProcess(test), IsPosixErrorOkAndHolds(0));
+}
+
+int RunChild() {
+ SleepSafe(kChildDelay);
+ return kChildExitCode;
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
+
+int main(int argc, char** argv) {
+ gvisor::testing::TestInit(&argc, &argv);
+
+ if (FLAGS_vfork_test_child) {
+ return gvisor::testing::RunChild();
+ }
+
+ return RUN_ALL_TESTS();
+}
diff --git a/test/syscalls/linux/vsyscall.cc b/test/syscalls/linux/vsyscall.cc
new file mode 100644
index 000000000..cb6840cc6
--- /dev/null
+++ b/test/syscalls/linux/vsyscall.cc
@@ -0,0 +1,44 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <time.h>
+
+#include "gtest/gtest.h"
+#include "test/util/proc_util.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+time_t vsyscall_time(time_t* t) {
+ constexpr uint64_t kVsyscallTimeEntry = 0xffffffffff600400;
+ return reinterpret_cast<time_t (*)(time_t*)>(kVsyscallTimeEntry)(t);
+}
+
+TEST(VsyscallTest, VsyscallAlwaysAvailableOnGvisor) {
+ SKIP_IF(!IsRunningOnGvisor());
+ // Vsyscall is always advertised by gvisor.
+ EXPECT_TRUE(ASSERT_NO_ERRNO_AND_VALUE(IsVsyscallEnabled()));
+ // Vsyscall should always works on gvisor.
+ time_t t;
+ EXPECT_THAT(vsyscall_time(&t), SyscallSucceeds());
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/wait.cc b/test/syscalls/linux/wait.cc
new file mode 100644
index 000000000..0a4ec7c6a
--- /dev/null
+++ b/test/syscalls/linux/wait.cc
@@ -0,0 +1,748 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <signal.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <functional>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/strings/str_cat.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "test/util/cleanup.h"
+#include "test/util/logging.h"
+#include "test/util/multiprocess_util.h"
+#include "test/util/posix_error.h"
+#include "test/util/signal_util.h"
+#include "test/util/test_util.h"
+
+using ::testing::UnorderedElementsAre;
+
+// These unit tests focus on the wait4(2) system call, but include a basic
+// checks for the i386 waitpid(2) syscall, which is a subset of wait4(2).
+//
+// NOTE: Some functionality is not tested as
+// it is not currently supported by gVisor:
+// * UID in waitid(2) siginfo.
+// * Process groups.
+// * Core dump status (WCOREDUMP).
+// * Linux only option __WNOTHREAD.
+//
+// Tests for waiting on stopped/continued children are in sigstop.cc.
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// The CloneChild function seems to need more than one page of stack space.
+static const size_t kStackSize = 2 * kPageSize;
+
+// The child thread created in CloneAndExit runs this function.
+// This child does not have the TLS setup, so it must not use glibc functions.
+int CloneChild(void* priv) {
+ int64_t sleep = reinterpret_cast<int64_t>(priv);
+ SleepSafe(absl::Seconds(sleep));
+
+ // glibc's _exit(2) function wrapper will helpfully call exit_group(2),
+ // exiting the entire process.
+ syscall(__NR_exit, 0);
+ return 1;
+}
+
+// ForkAndExit forks a child process which exits with exit_code, after
+// sleeping for the specified duration (seconds).
+pid_t ForkAndExit(int exit_code, int64_t sleep) {
+ pid_t child = fork();
+ if (child == 0) {
+ SleepSafe(absl::Seconds(sleep));
+ _exit(exit_code);
+ }
+ return child;
+}
+
+int64_t clock_gettime_nsecs(clockid_t id) {
+ struct timespec ts;
+ TEST_PCHECK(clock_gettime(id, &ts) == 0);
+ return (ts.tv_sec * 1000000000 + ts.tv_nsec);
+}
+
+void spin(int64_t sec) {
+ int64_t ns = sec * 1000000000;
+ int64_t start = clock_gettime_nsecs(CLOCK_THREAD_CPUTIME_ID);
+ int64_t end = start + ns;
+
+ do {
+ constexpr int kLoopCount = 1000000; // large and arbitrary
+ // volatile to prevent the compiler from skipping this loop.
+ for (volatile int i = 0; i < kLoopCount; i++) {
+ }
+ } while (clock_gettime_nsecs(CLOCK_THREAD_CPUTIME_ID) < end);
+}
+
+// ForkSpinAndExit forks a child process which exits with exit_code, after
+// spinning for the specified duration (seconds).
+pid_t ForkSpinAndExit(int exit_code, int64_t spintime) {
+ pid_t child = fork();
+ if (child == 0) {
+ spin(spintime);
+ _exit(exit_code);
+ }
+ return child;
+}
+
+absl::Duration RusageCpuTime(const struct rusage& ru) {
+ return absl::DurationFromTimeval(ru.ru_utime) +
+ absl::DurationFromTimeval(ru.ru_stime);
+}
+
+// Returns the address of the top of the stack.
+// Free with FreeStack.
+uintptr_t AllocStack() {
+ void* addr = mmap(nullptr, kStackSize, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ if (addr == MAP_FAILED) {
+ return reinterpret_cast<uintptr_t>(MAP_FAILED);
+ }
+
+ return reinterpret_cast<uintptr_t>(addr) + kStackSize;
+}
+
+// Frees a stack page allocated with AllocStack.
+int FreeStack(uintptr_t addr) {
+ addr -= kStackSize;
+ return munmap(reinterpret_cast<void*>(addr), kPageSize);
+}
+
+// CloneAndExit clones a child thread, which exits with 0 after sleeping for
+// the specified duration (must be in seconds). extra_flags are ORed against
+// the standard clone(2) flags.
+int CloneAndExit(int64_t sleep, uintptr_t stack, int extra_flags) {
+ return clone(CloneChild, reinterpret_cast<void*>(stack),
+ CLONE_FILES | CLONE_FS | CLONE_SIGHAND | CLONE_VM | extra_flags,
+ reinterpret_cast<void*>(sleep));
+}
+
+// Simple wrappers around wait4(2) and waitid(2) that ignore interrupts.
+constexpr auto Wait4 = RetryEINTR(wait4);
+constexpr auto Waitid = RetryEINTR(waitid);
+
+// Fixture for tests parameterized by a function that waits for any child to
+// exit with the given options, checks that it exited with the given code, and
+// then returns its PID.
+//
+// N.B. These tests run in a multi-threaded environment. We assume that
+// background threads do not create child processes and are not themselves
+// created with clone(... | SIGCHLD). Either may cause these tests to
+// erroneously wait on child processes/threads.
+class WaitAnyChildTest : public ::testing::TestWithParam<
+ std::function<PosixErrorOr<pid_t>(int, int)>> {
+ protected:
+ PosixErrorOr<pid_t> WaitAny(int code) { return WaitAnyWithOptions(code, 0); }
+
+ PosixErrorOr<pid_t> WaitAnyWithOptions(int code, int options) {
+ return GetParam()(code, options);
+ }
+};
+
+// Wait for any child to exit.
+TEST_P(WaitAnyChildTest, Fork) {
+ pid_t child;
+ ASSERT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds());
+
+ EXPECT_THAT(WaitAny(0), IsPosixErrorOkAndHolds(child));
+}
+
+// Call wait4 for any process after the child has already exited.
+TEST_P(WaitAnyChildTest, AfterExit) {
+ pid_t child;
+ ASSERT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds());
+
+ absl::SleepFor(absl::Seconds(5));
+
+ EXPECT_THAT(WaitAny(0), IsPosixErrorOkAndHolds(child));
+}
+
+// Wait for multiple children to exit, waiting for either at a time.
+TEST_P(WaitAnyChildTest, MultipleFork) {
+ pid_t child1, child2;
+ ASSERT_THAT(child1 = ForkAndExit(0, 0), SyscallSucceeds());
+ ASSERT_THAT(child2 = ForkAndExit(0, 0), SyscallSucceeds());
+
+ std::vector<pid_t> pids;
+ pids.push_back(ASSERT_NO_ERRNO_AND_VALUE(WaitAny(0)));
+ pids.push_back(ASSERT_NO_ERRNO_AND_VALUE(WaitAny(0)));
+ EXPECT_THAT(pids, UnorderedElementsAre(child1, child2));
+}
+
+// Wait for any child to exit.
+// A non-CLONE_THREAD child which sends SIGCHLD upon exit behaves much like
+// a forked process.
+TEST_P(WaitAnyChildTest, CloneSIGCHLD) {
+ uintptr_t stack;
+ ASSERT_THAT(stack = AllocStack(), SyscallSucceeds());
+ auto free = Cleanup(
+ [this, stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); });
+
+ int child;
+ ASSERT_THAT(child = CloneAndExit(0, stack, SIGCHLD), SyscallSucceeds());
+
+ EXPECT_THAT(WaitAny(0), IsPosixErrorOkAndHolds(child));
+}
+
+// Wait for a child thread and process.
+TEST_P(WaitAnyChildTest, ForkAndClone) {
+ pid_t process;
+ ASSERT_THAT(process = ForkAndExit(0, 0), SyscallSucceeds());
+
+ uintptr_t stack;
+ ASSERT_THAT(stack = AllocStack(), SyscallSucceeds());
+ auto free = Cleanup(
+ [this, stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); });
+
+ int thread;
+ // Send SIGCHLD for normal wait semantics.
+ ASSERT_THAT(thread = CloneAndExit(0, stack, SIGCHLD), SyscallSucceeds());
+
+ std::vector<pid_t> pids;
+ pids.push_back(ASSERT_NO_ERRNO_AND_VALUE(WaitAny(0)));
+ pids.push_back(ASSERT_NO_ERRNO_AND_VALUE(WaitAny(0)));
+ EXPECT_THAT(pids, UnorderedElementsAre(process, thread));
+}
+
+// Return immediately if no child has exited.
+TEST_P(WaitAnyChildTest, WaitWNOHANG) {
+ EXPECT_THAT(
+ WaitAnyWithOptions(0, WNOHANG),
+ PosixErrorIs(ECHILD, ::testing::AnyOf(::testing::StrEq("waitid"),
+ ::testing::StrEq("wait4"))));
+}
+
+// Bad options passed
+TEST_P(WaitAnyChildTest, BadOption) {
+ EXPECT_THAT(
+ WaitAnyWithOptions(0, 123456),
+ PosixErrorIs(EINVAL, ::testing::AnyOf(::testing::StrEq("waitid"),
+ ::testing::StrEq("wait4"))));
+}
+
+TEST_P(WaitAnyChildTest, WaitedChildRusage) {
+ struct rusage before;
+ ASSERT_THAT(getrusage(RUSAGE_CHILDREN, &before), SyscallSucceeds());
+
+ pid_t child;
+ constexpr absl::Duration kSpin = absl::Seconds(3);
+ ASSERT_THAT(child = ForkSpinAndExit(0, absl::ToInt64Seconds(kSpin)),
+ SyscallSucceeds());
+ ASSERT_THAT(WaitAny(0), IsPosixErrorOkAndHolds(child));
+
+ struct rusage after;
+ ASSERT_THAT(getrusage(RUSAGE_CHILDREN, &after), SyscallSucceeds());
+
+ EXPECT_GE(RusageCpuTime(after) - RusageCpuTime(before), kSpin);
+}
+
+TEST_P(WaitAnyChildTest, IgnoredChildRusage) {
+ // "POSIX.1-2001 specifies that if the disposition of SIGCHLD is
+ // set to SIG_IGN or the SA_NOCLDWAIT flag is set for SIGCHLD (see
+ // sigaction(2)), then children that terminate do not become zombies and a
+ // call to wait() or waitpid() will block until all children have terminated,
+ // and then fail with errno set to ECHILD." - waitpid(2)
+ //
+ // "RUSAGE_CHILDREN: Return resource usage statistics for all children of the
+ // calling process that have terminated *and been waited for*." -
+ // getrusage(2), emphasis added
+
+ struct sigaction sa;
+ sa.sa_handler = SIG_IGN;
+ const auto cleanup_sigact =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGCHLD, sa));
+
+ struct rusage before;
+ ASSERT_THAT(getrusage(RUSAGE_CHILDREN, &before), SyscallSucceeds());
+
+ const absl::Duration start =
+ absl::Nanoseconds(clock_gettime_nsecs(CLOCK_MONOTONIC));
+
+ constexpr absl::Duration kSpin = absl::Seconds(3);
+
+ // ForkAndSpin uses CLOCK_THREAD_CPUTIME_ID, which is lower resolution than,
+ // and may diverge from, CLOCK_MONOTONIC, so we allow a small grace period but
+ // still check that we blocked for a while.
+ constexpr absl::Duration kSpinGrace = absl::Milliseconds(100);
+
+ pid_t child;
+ ASSERT_THAT(child = ForkSpinAndExit(0, absl::ToInt64Seconds(kSpin)),
+ SyscallSucceeds());
+ ASSERT_THAT(WaitAny(0), PosixErrorIs(ECHILD, ::testing::AnyOf(
+ ::testing::StrEq("waitid"),
+ ::testing::StrEq("wait4"))));
+ const absl::Duration end =
+ absl::Nanoseconds(clock_gettime_nsecs(CLOCK_MONOTONIC));
+ EXPECT_GE(end - start, kSpin - kSpinGrace);
+
+ struct rusage after;
+ ASSERT_THAT(getrusage(RUSAGE_CHILDREN, &after), SyscallSucceeds());
+ EXPECT_EQ(before.ru_utime.tv_sec, after.ru_utime.tv_sec);
+ EXPECT_EQ(before.ru_utime.tv_usec, after.ru_utime.tv_usec);
+ EXPECT_EQ(before.ru_stime.tv_sec, after.ru_stime.tv_sec);
+ EXPECT_EQ(before.ru_stime.tv_usec, after.ru_stime.tv_usec);
+}
+
+INSTANTIATE_TEST_CASE_P(
+ Waiters, WaitAnyChildTest,
+ ::testing::Values(
+ [](int code, int options) -> PosixErrorOr<pid_t> {
+ int status;
+ auto const pid = Wait4(-1, &status, options, nullptr);
+ MaybeSave();
+ if (pid < 0) {
+ return PosixError(errno, "wait4");
+ }
+ if (!WIFEXITED(status) || WEXITSTATUS(status) != code) {
+ return PosixError(
+ EINVAL, absl::StrCat("unexpected wait status: got ", status,
+ ", wanted ", code));
+ }
+ return static_cast<pid_t>(pid);
+ },
+ [](int code, int options) -> PosixErrorOr<pid_t> {
+ siginfo_t si;
+ auto const rv = Waitid(P_ALL, 0, &si, WEXITED | options);
+ MaybeSave();
+ if (rv < 0) {
+ return PosixError(errno, "waitid");
+ }
+ if (si.si_signo != SIGCHLD) {
+ return PosixError(
+ EINVAL, absl::StrCat("unexpected signo: got ", si.si_signo,
+ ", wanted ", SIGCHLD));
+ }
+ if (si.si_status != code) {
+ return PosixError(
+ EINVAL, absl::StrCat("unexpected status: got ", si.si_status,
+ ", wanted ", code));
+ }
+ if (si.si_code != CLD_EXITED) {
+ return PosixError(EINVAL,
+ absl::StrCat("unexpected code: got ", si.si_code,
+ ", wanted ", CLD_EXITED));
+ }
+ auto const uid = getuid();
+ if (si.si_uid != uid) {
+ return PosixError(EINVAL,
+ absl::StrCat("unexpected uid: got ", si.si_uid,
+ ", wanted ", uid));
+ }
+ return static_cast<pid_t>(si.si_pid);
+ }));
+
+// Fixture for tests parameterized by a function that takes the PID of a
+// specific child to wait for, waits for it to exit, and checks that it exits
+// with the given code.
+class WaitSpecificChildTest
+ : public ::testing::TestWithParam<std::function<PosixError(pid_t, int)>> {
+ protected:
+ PosixError WaitFor(pid_t pid, int code) { return GetParam()(pid, code); }
+};
+
+// Wait for specific child to exit.
+TEST_P(WaitSpecificChildTest, Fork) {
+ pid_t child;
+ ASSERT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds());
+
+ EXPECT_NO_ERRNO(WaitFor(child, 0));
+}
+
+// Non-zero exit codes are correctly propagated.
+TEST_P(WaitSpecificChildTest, NormalExit) {
+ pid_t child;
+ ASSERT_THAT(child = ForkAndExit(42, 0), SyscallSucceeds());
+
+ EXPECT_NO_ERRNO(WaitFor(child, 42));
+}
+
+// Wait for multiple children to exit.
+TEST_P(WaitSpecificChildTest, MultipleFork) {
+ pid_t child1, child2;
+ ASSERT_THAT(child1 = ForkAndExit(0, 0), SyscallSucceeds());
+ ASSERT_THAT(child2 = ForkAndExit(0, 0), SyscallSucceeds());
+
+ EXPECT_NO_ERRNO(WaitFor(child1, 0));
+ EXPECT_NO_ERRNO(WaitFor(child2, 0));
+}
+
+// Wait for multiple children to exit, out of the order they were created.
+TEST_P(WaitSpecificChildTest, MultipleForkOutOfOrder) {
+ pid_t child1, child2;
+ ASSERT_THAT(child1 = ForkAndExit(0, 0), SyscallSucceeds());
+ ASSERT_THAT(child2 = ForkAndExit(0, 0), SyscallSucceeds());
+
+ EXPECT_NO_ERRNO(WaitFor(child2, 0));
+ EXPECT_NO_ERRNO(WaitFor(child1, 0));
+}
+
+// Wait for specific child to exit, entering wait4 before the exit occurs.
+TEST_P(WaitSpecificChildTest, ForkSleep) {
+ pid_t child;
+ ASSERT_THAT(child = ForkAndExit(0, 5), SyscallSucceeds());
+
+ EXPECT_NO_ERRNO(WaitFor(child, 0));
+}
+
+// Wait should block until the child exits.
+TEST_P(WaitSpecificChildTest, ForkBlock) {
+ pid_t child;
+
+ auto start = absl::Now();
+ ASSERT_THAT(child = ForkAndExit(0, 5), SyscallSucceeds());
+
+ EXPECT_NO_ERRNO(WaitFor(child, 0));
+
+ EXPECT_GE(absl::Now() - start, absl::Seconds(5));
+}
+
+// Waiting after the child has already exited returns immediately.
+TEST_P(WaitSpecificChildTest, AfterExit) {
+ pid_t child;
+ ASSERT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds());
+
+ absl::SleepFor(absl::Seconds(5));
+
+ EXPECT_NO_ERRNO(WaitFor(child, 0));
+}
+
+// Wait for specific child to exit.
+// A non-CLONE_THREAD child which sends SIGCHLD upon exit behaves much like
+// a forked process.
+TEST_P(WaitSpecificChildTest, CloneSIGCHLD) {
+ uintptr_t stack;
+ ASSERT_THAT(stack = AllocStack(), SyscallSucceeds());
+ auto free = Cleanup(
+ [this, stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); });
+
+ int child;
+ ASSERT_THAT(child = CloneAndExit(0, stack, SIGCHLD), SyscallSucceeds());
+
+ EXPECT_NO_ERRNO(WaitFor(child, 0));
+}
+
+// Wait for specific child to exit.
+// A non-CLONE_THREAD child which does not send SIGCHLD upon exit can be waited
+// on, but returns ECHILD.
+TEST_P(WaitSpecificChildTest, CloneNoSIGCHLD) {
+ uintptr_t stack;
+ ASSERT_THAT(stack = AllocStack(), SyscallSucceeds());
+ auto free = Cleanup(
+ [this, stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); });
+
+ int child;
+ ASSERT_THAT(child = CloneAndExit(0, stack, 0), SyscallSucceeds());
+
+ EXPECT_THAT(
+ WaitFor(child, 0),
+ PosixErrorIs(ECHILD, ::testing::AnyOf(::testing::StrEq("waitid"),
+ ::testing::StrEq("wait4"))));
+}
+
+// Waiting after the child has already exited returns immediately.
+TEST_P(WaitSpecificChildTest, CloneAfterExit) {
+ uintptr_t stack;
+ ASSERT_THAT(stack = AllocStack(), SyscallSucceeds());
+ auto free = Cleanup(
+ [this, stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); });
+
+ int child;
+ // Send SIGCHLD for normal wait semantics.
+ ASSERT_THAT(child = CloneAndExit(0, stack, SIGCHLD), SyscallSucceeds());
+
+ absl::SleepFor(absl::Seconds(5));
+
+ EXPECT_NO_ERRNO(WaitFor(child, 0));
+}
+
+// A CLONE_THREAD child cannot be waited on.
+TEST_P(WaitSpecificChildTest, CloneThread) {
+ uintptr_t stack;
+ ASSERT_THAT(stack = AllocStack(), SyscallSucceeds());
+ auto free = Cleanup(
+ [this, stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); });
+
+ int child;
+ ASSERT_THAT(child = CloneAndExit(15, stack, CLONE_THREAD), SyscallSucceeds());
+ auto start = absl::Now();
+
+ EXPECT_THAT(
+ WaitFor(child, 0),
+ PosixErrorIs(ECHILD, ::testing::AnyOf(::testing::StrEq("waitid"),
+ ::testing::StrEq("wait4"))));
+
+ // Ensure wait4 didn't block.
+ EXPECT_LE(absl::Now() - start, absl::Seconds(10));
+
+ // Since we can't wait on the child, we sleep to try to avoid freeing its
+ // stack before it exits.
+ absl::SleepFor(absl::Seconds(5));
+}
+
+// Return ECHILD for bad child.
+TEST_P(WaitSpecificChildTest, BadChild) {
+ EXPECT_THAT(
+ WaitFor(42, 0),
+ PosixErrorIs(ECHILD, ::testing::AnyOf(::testing::StrEq("waitid"),
+ ::testing::StrEq("wait4"))));
+}
+
+// Wait for a child process that only exits after calling execve(2) from a
+// non-leader thread.
+TEST_P(WaitSpecificChildTest, AfterChildExecve) {
+ ExecveArray const owned_child_argv = {"/bin/true"};
+ char* const* const child_argv = owned_child_argv.get();
+
+ uintptr_t stack;
+ ASSERT_THAT(stack = AllocStack(), SyscallSucceeds());
+ auto free = Cleanup(
+ [this, stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); });
+
+ pid_t const child = fork();
+ if (child == 0) {
+ // Give the parent some time to start waiting.
+ SleepSafe(absl::Seconds(5));
+ // Pass CLONE_VFORK to block the original thread in the child process until
+ // the clone thread calls execve, annihilating them both. (This means that
+ // if clone returns at all, something went wrong.)
+ //
+ // N.B. clone(2) is not officially async-signal-safe, but at minimum glibc's
+ // x86_64 implementation is safe. See glibc
+ // sysdeps/unix/sysv/linux/x86_64/clone.S.
+ clone(
+ +[](void* arg) {
+ auto child_argv = static_cast<char* const*>(arg);
+ execve(child_argv[0], child_argv, /* envp = */ nullptr);
+ return errno;
+ },
+ reinterpret_cast<void*>(stack),
+ CLONE_FILES | CLONE_FS | CLONE_SIGHAND | CLONE_THREAD | CLONE_VM |
+ CLONE_VFORK,
+ const_cast<char**>(child_argv));
+ _exit(errno);
+ }
+ EXPECT_NO_ERRNO(WaitFor(child, 0));
+}
+
+INSTANTIATE_TEST_CASE_P(
+ Waiters, WaitSpecificChildTest,
+ ::testing::Values(
+ [](pid_t pid, int code) -> PosixError {
+ int status;
+ auto const rv = Wait4(pid, &status, 0, nullptr);
+ MaybeSave();
+ if (rv < 0) {
+ return PosixError(errno, "wait4");
+ } else if (rv != pid) {
+ return PosixError(EINVAL, absl::StrCat("unexpected pid: got ", rv,
+ ", wanted ", pid));
+ }
+ if (!WIFEXITED(status) || WEXITSTATUS(status) != code) {
+ return PosixError(
+ EINVAL, absl::StrCat("unexpected wait status: got ", status,
+ ", wanted ", code));
+ }
+ return NoError();
+ },
+ [](pid_t pid, int code) -> PosixError {
+ siginfo_t si;
+ auto const rv = Waitid(P_PID, pid, &si, WEXITED);
+ MaybeSave();
+ if (rv < 0) {
+ return PosixError(errno, "waitid");
+ }
+ if (si.si_pid != pid) {
+ return PosixError(EINVAL,
+ absl::StrCat("unexpected pid: got ", si.si_pid,
+ ", wanted ", pid));
+ }
+ if (si.si_signo != SIGCHLD) {
+ return PosixError(
+ EINVAL, absl::StrCat("unexpected signo: got ", si.si_signo,
+ ", wanted ", SIGCHLD));
+ }
+ if (si.si_status != code) {
+ return PosixError(
+ EINVAL, absl::StrCat("unexpected status: got ", si.si_status,
+ ", wanted ", code));
+ }
+ if (si.si_code != CLD_EXITED) {
+ return PosixError(EINVAL,
+ absl::StrCat("unexpected code: got ", si.si_code,
+ ", wanted ", CLD_EXITED));
+ }
+ return NoError();
+ }));
+
+// WIFEXITED, WIFSIGNALED, WTERMSIG indicate signal exit.
+TEST(WaitTest, SignalExit) {
+ pid_t child;
+ ASSERT_THAT(child = ForkAndExit(0, 10), SyscallSucceeds());
+
+ EXPECT_THAT(kill(child, SIGKILL), SyscallSucceeds());
+
+ int status;
+ EXPECT_THAT(Wait4(child, &status, 0, nullptr),
+ SyscallSucceedsWithValue(child));
+
+ EXPECT_FALSE(WIFEXITED(status));
+ EXPECT_TRUE(WIFSIGNALED(status));
+ EXPECT_EQ(SIGKILL, WTERMSIG(status));
+}
+
+// A child that does not send a SIGCHLD on exit may be waited on with
+// the __WCLONE flag.
+TEST(WaitTest, CloneWCLONE) {
+ uintptr_t stack;
+ ASSERT_THAT(stack = AllocStack(), SyscallSucceeds());
+ auto free = Cleanup(
+ [this, stack] { ASSERT_THAT(FreeStack(stack), SyscallSucceeds()); });
+
+ int child;
+ ASSERT_THAT(child = CloneAndExit(0, stack, 0), SyscallSucceeds());
+
+ EXPECT_THAT(Wait4(child, nullptr, __WCLONE, nullptr),
+ SyscallSucceedsWithValue(child));
+}
+
+// waitid requires at least one option.
+TEST(WaitTest, WaitidOptions) {
+ EXPECT_THAT(Waitid(P_ALL, 0, nullptr, 0), SyscallFailsWithErrno(EINVAL));
+}
+
+// waitid does not wait for a child to exit if not passed WEXITED.
+TEST(WaitTest, WaitidNoWEXITED) {
+ pid_t child;
+ ASSERT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds());
+ EXPECT_THAT(Waitid(P_ALL, 0, nullptr, WSTOPPED),
+ SyscallFailsWithErrno(ECHILD));
+ EXPECT_THAT(Waitid(P_ALL, 0, nullptr, WEXITED), SyscallSucceeds());
+}
+
+// WNOWAIT allows the same wait result to be returned again.
+TEST(WaitTest, WaitidWNOWAIT) {
+ pid_t child;
+ ASSERT_THAT(child = ForkAndExit(42, 0), SyscallSucceeds());
+
+ siginfo_t info;
+ ASSERT_THAT(Waitid(P_PID, child, &info, WEXITED | WNOWAIT),
+ SyscallSucceeds());
+ EXPECT_EQ(child, info.si_pid);
+ EXPECT_EQ(SIGCHLD, info.si_signo);
+ EXPECT_EQ(CLD_EXITED, info.si_code);
+ EXPECT_EQ(42, info.si_status);
+
+ ASSERT_THAT(Waitid(P_PID, child, &info, WEXITED), SyscallSucceeds());
+ EXPECT_EQ(child, info.si_pid);
+ EXPECT_EQ(SIGCHLD, info.si_signo);
+ EXPECT_EQ(CLD_EXITED, info.si_code);
+ EXPECT_EQ(42, info.si_status);
+
+ EXPECT_THAT(Waitid(P_PID, child, &info, WEXITED),
+ SyscallFailsWithErrno(ECHILD));
+}
+
+// waitpid(pid, status, options) is equivalent to
+// wait4(pid, status, options, nullptr).
+// This is a dedicated syscall on i386, glibc maps it to wait4 on amd64.
+TEST(WaitTest, WaitPid) {
+ pid_t child;
+ ASSERT_THAT(child = ForkAndExit(42, 0), SyscallSucceeds());
+
+ int status;
+ EXPECT_THAT(RetryEINTR(waitpid)(child, &status, 0),
+ SyscallSucceedsWithValue(child));
+
+ EXPECT_TRUE(WIFEXITED(status));
+ EXPECT_EQ(42, WEXITSTATUS(status));
+}
+
+// Test that signaling a zombie succeeds. This is a signals test that is in this
+// file for some reason.
+TEST(WaitTest, KillZombie) {
+ pid_t child;
+ ASSERT_THAT(child = ForkAndExit(42, 0), SyscallSucceeds());
+
+ // Sleep for three seconds to ensure the child has exited.
+ absl::SleepFor(absl::Seconds(3));
+
+ // The child is now a zombie. Check that killing it returns 0.
+ EXPECT_THAT(kill(child, SIGTERM), SyscallSucceeds());
+ EXPECT_THAT(kill(child, 0), SyscallSucceeds());
+
+ EXPECT_THAT(Wait4(child, nullptr, 0, nullptr),
+ SyscallSucceedsWithValue(child));
+}
+
+TEST(WaitTest, Wait4Rusage) {
+ pid_t child;
+ constexpr absl::Duration kSpin = absl::Seconds(3);
+ ASSERT_THAT(child = ForkSpinAndExit(21, absl::ToInt64Seconds(kSpin)),
+ SyscallSucceeds());
+
+ int status;
+ struct rusage rusage = {};
+ ASSERT_THAT(Wait4(child, &status, 0, &rusage),
+ SyscallSucceedsWithValue(child));
+
+ EXPECT_TRUE(WIFEXITED(status));
+ EXPECT_EQ(21, WEXITSTATUS(status));
+
+ EXPECT_GE(RusageCpuTime(rusage), kSpin);
+}
+
+TEST(WaitTest, WaitidRusage) {
+ pid_t child;
+ constexpr absl::Duration kSpin = absl::Seconds(3);
+ ASSERT_THAT(child = ForkSpinAndExit(27, absl::ToInt64Seconds(kSpin)),
+ SyscallSucceeds());
+
+ siginfo_t si = {};
+ struct rusage rusage = {};
+
+ // From waitid(2):
+ // The raw waitid() system call takes a fifth argument, of type
+ // struct rusage *. If this argument is non-NULL, then it is used
+ // to return resource usage information about the child, in the
+ // same manner as wait4(2).
+ EXPECT_THAT(
+ RetryEINTR(syscall)(SYS_waitid, P_PID, child, &si, WEXITED, &rusage),
+ SyscallSucceeds());
+ EXPECT_EQ(si.si_signo, SIGCHLD);
+ EXPECT_EQ(si.si_code, CLD_EXITED);
+ EXPECT_EQ(si.si_status, 27);
+ EXPECT_EQ(si.si_pid, child);
+
+ EXPECT_GE(RusageCpuTime(rusage), kSpin);
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/write.cc b/test/syscalls/linux/write.cc
new file mode 100644
index 000000000..ca6aafd18
--- /dev/null
+++ b/test/syscalls/linux/write.cc
@@ -0,0 +1,134 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "test/util/cleanup.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+// This test is currently very rudimentary.
+//
+// TODO:
+// * bad buffer states (EFAULT).
+// * bad fds (wrong permission, wrong type of file, EBADF).
+// * check offset is incremented.
+// * check for EOF.
+// * writing to pipes, symlinks, special files.
+class WriteTest : public ::testing::Test {
+ public:
+ ssize_t WriteBytes(int fd, int bytes) {
+ std::vector<char> buf(bytes);
+ std::fill(buf.begin(), buf.end(), 'a');
+ return WriteFd(fd, buf.data(), buf.size());
+ }
+};
+
+TEST_F(WriteTest, WriteNoExceedsRLimit) {
+ // Get the current rlimit and restore after test run.
+ struct rlimit initial_lim;
+ ASSERT_THAT(getrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds());
+ auto cleanup = Cleanup([&initial_lim] {
+ EXPECT_THAT(setrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds());
+ });
+
+ int fd;
+ struct rlimit setlim;
+ const int target_lim = 1024;
+ setlim.rlim_cur = target_lim;
+ setlim.rlim_max = RLIM_INFINITY;
+ const std::string pathname = NewTempAbsPath();
+ ASSERT_THAT(fd = open(pathname.c_str(), O_WRONLY | O_CREAT, S_IRWXU),
+ SyscallSucceeds());
+ ASSERT_THAT(setrlimit(RLIMIT_FSIZE, &setlim), SyscallSucceeds());
+
+ EXPECT_THAT(WriteBytes(fd, target_lim), SyscallSucceedsWithValue(target_lim));
+
+ std::vector<char> buf(target_lim + 1);
+ std::fill(buf.begin(), buf.end(), 'a');
+ EXPECT_THAT(pwrite(fd, buf.data(), target_lim, 1), SyscallSucceeds());
+ EXPECT_THAT(pwrite64(fd, buf.data(), target_lim, 1), SyscallSucceeds());
+
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+TEST_F(WriteTest, WriteExceedsRLimit) {
+ // Get the current rlimit and restore after test run.
+ struct rlimit initial_lim;
+ ASSERT_THAT(getrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds());
+ auto cleanup = Cleanup([&initial_lim] {
+ EXPECT_THAT(setrlimit(RLIMIT_FSIZE, &initial_lim), SyscallSucceeds());
+ });
+
+ int fd;
+ sigset_t filesize_mask;
+ sigemptyset(&filesize_mask);
+ sigaddset(&filesize_mask, SIGXFSZ);
+
+ struct rlimit setlim;
+ const int target_lim = 1024;
+ setlim.rlim_cur = target_lim;
+ setlim.rlim_max = RLIM_INFINITY;
+
+ const std::string pathname = NewTempAbsPath();
+ ASSERT_THAT(fd = open(pathname.c_str(), O_WRONLY | O_CREAT, S_IRWXU),
+ SyscallSucceeds());
+ ASSERT_THAT(setrlimit(RLIMIT_FSIZE, &setlim), SyscallSucceeds());
+ ASSERT_THAT(sigprocmask(SIG_BLOCK, &filesize_mask, nullptr),
+ SyscallSucceeds());
+ std::vector<char> buf(target_lim + 2);
+ std::fill(buf.begin(), buf.end(), 'a');
+
+ EXPECT_THAT(write(fd, buf.data(), target_lim + 1),
+ SyscallSucceedsWithValue(target_lim));
+ EXPECT_THAT(write(fd, buf.data(), 1), SyscallFailsWithErrno(EFBIG));
+ struct timespec timelimit = {0, 0};
+ EXPECT_THAT(RetryEINTR(sigtimedwait)(&filesize_mask, nullptr, &timelimit),
+ SyscallSucceedsWithValue(SIGXFSZ));
+
+ EXPECT_THAT(pwrite(fd, buf.data(), target_lim + 1, 1),
+ SyscallSucceedsWithValue(target_lim - 1));
+ EXPECT_THAT(pwrite(fd, buf.data(), 1, target_lim),
+ SyscallFailsWithErrno(EFBIG));
+ EXPECT_THAT(RetryEINTR(sigtimedwait)(&filesize_mask, nullptr, &timelimit),
+ SyscallSucceedsWithValue(SIGXFSZ));
+
+ EXPECT_THAT(pwrite64(fd, buf.data(), target_lim + 1, 1),
+ SyscallSucceedsWithValue(target_lim - 1));
+ EXPECT_THAT(pwrite64(fd, buf.data(), 1, target_lim),
+ SyscallFailsWithErrno(EFBIG));
+ EXPECT_THAT(RetryEINTR(sigtimedwait)(&filesize_mask, nullptr, &timelimit),
+ SyscallSucceedsWithValue(SIGXFSZ));
+
+ ASSERT_THAT(sigprocmask(SIG_UNBLOCK, &filesize_mask, nullptr),
+ SyscallSucceeds());
+ EXPECT_THAT(close(fd), SyscallSucceeds());
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/syscall_test.go b/test/syscalls/syscall_test.go
new file mode 100644
index 000000000..8463289fe
--- /dev/null
+++ b/test/syscalls/syscall_test.go
@@ -0,0 +1,245 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package syscall_test runs the syscall test suites in gVisor containers. It
+// is meant to be run with "go test", and will panic if run on its own.
+package syscall_test
+
+import (
+ "flag"
+ "fmt"
+ "io/ioutil"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "strings"
+ "syscall"
+ "testing"
+
+ "golang.org/x/sys/unix"
+ "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.googlesource.com/gvisor/runsc/boot"
+ "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.googlesource.com/gvisor/test/syscalls/gtest"
+)
+
+// Location of syscall tests, relative to the repo root.
+const testDir = "test/syscalls/linux"
+
+var (
+ testName = flag.String("test-name", "", "name of test binary to run")
+ debug = flag.Bool("debug", false, "enable debug logs")
+ strace = flag.Bool("strace", false, "enable strace logs")
+ platform = flag.String("platform", "ptrace", "platform to run on")
+ parallel = flag.Bool("parallel", false, "run tests in parallel")
+)
+
+func TestSyscalls(t *testing.T) {
+ if *testName == "" {
+ t.Fatalf("test-name flag must be provided")
+ }
+
+ // Get path to test binary.
+ fullTestName := filepath.Join(testDir, *testName)
+ testBin, err := testutil.FindFile(fullTestName)
+ if err != nil {
+ t.Fatalf("FindFile(%q) failed: %v", fullTestName, err)
+ }
+
+ // Get all test cases in each binary.
+ testCases, err := gtest.ParseTestCases(testBin)
+ if err != nil {
+ t.Fatalf("ParseTestCases(%q) failed: %v", testBin, err)
+ }
+
+ // Make sure stdout and stderr are opened with O_APPEND, otherwise logs
+ // from outside the sandbox can (and will) stomp on logs from inside
+ // the sandbox.
+ for _, f := range []*os.File{os.Stdout, os.Stderr} {
+ flags, err := unix.FcntlInt(f.Fd(), unix.F_GETFL, 0)
+ if err != nil {
+ t.Fatalf("error getting file flags for %v: %v", f, err)
+ }
+ if flags&unix.O_APPEND == 0 {
+ flags |= unix.O_APPEND
+ if _, err := unix.FcntlInt(f.Fd(), unix.F_SETFL, flags); err != nil {
+ t.Fatalf("error setting file flags for %v: %v", f, err)
+ }
+ }
+ }
+
+ for _, tc := range testCases {
+ // Capture tc.
+ tc := tc
+
+ testName := fmt.Sprintf("%s_%s", tc.Suite, tc.Name)
+ t.Run(testName, func(t *testing.T) {
+ if *parallel {
+ t.Parallel()
+ }
+
+ if *platform == "native" {
+ // Run the test case on host.
+ runTestCaseNative(testBin, tc, t)
+ return
+ }
+
+ // Run the test case in runsc.
+ runTestCaseRunsc(testBin, tc, t)
+ })
+ }
+}
+
+// runTestCaseNative runs the test case directly on the host machine.
+func runTestCaseNative(testBin string, tc gtest.TestCase, t *testing.T) {
+ // These tests might be running in parallel, so make sure they have a
+ // unique test temp dir.
+ tmpDir, err := ioutil.TempDir(testutil.TmpDir(), "")
+ if err != nil {
+ t.Fatalf("could not create temp dir: %v", err)
+ }
+ defer os.RemoveAll(tmpDir)
+
+ // Replace TEST_TMPDIR in the current environment with something
+ // unique.
+ env := os.Environ()
+ newEnvVar := "TEST_TMPDIR=" + tmpDir
+ var found bool
+ for i, kv := range env {
+ if strings.HasPrefix(kv, "TEST_TMPDIR=") {
+ env[i] = newEnvVar
+ found = true
+ break
+ }
+ }
+ if !found {
+ env = append(env, newEnvVar)
+ }
+ // Remove the TEST_PREMATURE_EXIT_FILE variable and XML_OUTPUT_FILE
+ // from the environment.
+ env = filterEnv(env, []string{"TEST_PREMATURE_EXIT_FILE", "XML_OUTPUT_FILE"})
+
+ cmd := exec.Command(testBin, gtest.FilterTestFlag+"="+tc.FullName())
+ cmd.Env = env
+ cmd.Stdout = os.Stdout
+ cmd.Stderr = os.Stderr
+ if err := cmd.Run(); err != nil {
+ ws := err.(*exec.ExitError).Sys().(syscall.WaitStatus)
+ t.Errorf("test %q exited with status %d, want 0", tc.FullName(), ws.ExitStatus())
+ }
+}
+
+// runsTestCaseRunsc runs the test case in runsc.
+func runTestCaseRunsc(testBin string, tc gtest.TestCase, t *testing.T) {
+ rootDir, err := testutil.SetupRootDir()
+ if err != nil {
+ t.Fatalf("SetupRootDir failed: %v", err)
+ }
+ defer os.RemoveAll(rootDir)
+
+ conf := testutil.TestConfig()
+ conf.RootDir = rootDir
+ conf.Debug = *debug
+ conf.Strace = *strace
+ p, err := boot.MakePlatformType(*platform)
+ if err != nil {
+ t.Fatalf("error getting platform %q: %v", *platform, err)
+ }
+ conf.Platform = p
+
+ // Run a new container with the test executable and filter for the
+ // given test suite and name.
+ spec := testutil.NewSpecWithArgs(testBin, gtest.FilterTestFlag+"="+tc.FullName())
+
+ // Mark the root as writeable, as some tests attempt to
+ // write to the rootfs, and expect EACCES, not EROFS.
+ spec.Root.Readonly = false
+
+ // Set environment variable that indicates we are
+ // running in gVisor and with the given platform.
+ platformVar := "TEST_ON_GVISOR"
+ env := append(os.Environ(), platformVar+"="+*platform)
+
+ // Remove the TEST_PREMATURE_EXIT_FILE variable and XML_OUTPUT_FILE
+ // from the environment.
+ env = filterEnv(env, []string{"TEST_PREMATURE_EXIT_FILE", "XML_OUTPUT_FILE"})
+
+ // Set TEST_TMPDIR to /tmp, as some of the syscall tests require it to
+ // be backed by tmpfs.
+ for i, kv := range env {
+ if strings.HasPrefix(kv, "TEST_TMPDIR=") {
+ env[i] = "TEST_TMPDIR=/tmp"
+ break
+ }
+ }
+
+ spec.Process.Env = env
+
+ bundleDir, err := testutil.SetupBundleDir(spec)
+ if err != nil {
+ t.Fatalf("SetupBundleDir failed: %v", err)
+ }
+ defer os.RemoveAll(bundleDir)
+
+ id := testutil.UniqueContainerID()
+ log.Infof("Running test %q in container %q", tc.FullName(), id)
+ specutils.LogSpec(spec)
+ ws, err := container.Run(id, spec, conf, bundleDir, "", "", "")
+ if err != nil {
+ t.Fatalf("container.Run failed: %v", err)
+ }
+ if got := ws.ExitStatus(); got != 0 {
+ t.Errorf("test %q exited with status %d, want 0", tc.FullName(), ws.ExitStatus())
+ }
+}
+
+// filterEnv returns an environment with the blacklisted variables removed.
+func filterEnv(env, blacklist []string) []string {
+ var out []string
+ for _, kv := range env {
+ ok := true
+ for _, k := range blacklist {
+ if strings.HasPrefix(kv, k+"=") {
+ ok = false
+ break
+ }
+ }
+ if ok {
+ out = append(out, kv)
+ }
+ }
+ return out
+}
+
+func TestMain(m *testing.M) {
+ flag.Parse()
+
+ log.SetLevel(log.Warning)
+ if *debug {
+ log.SetLevel(log.Debug)
+ }
+ if err := testutil.ConfigureExePath(); err != nil {
+ panic(err.Error())
+ }
+
+ if *platform != "native" {
+ // The native tests don't expect to be running as root, but
+ // runsc requires it.
+ testutil.RunAsRoot()
+ }
+
+ os.Exit(m.Run())
+}
diff --git a/test/syscalls/syscall_test_runner.sh b/test/syscalls/syscall_test_runner.sh
new file mode 100755
index 000000000..de479f68c
--- /dev/null
+++ b/test/syscalls/syscall_test_runner.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+# Copyright 2018 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# syscall_test_runner.sh is a simple wrapper around the go syscall test runner.
+# It exists so that we can build the syscall test runner once, and use it for
+# all syscall tests, rather than build it for each test run.
+
+set -euf -o pipefail
+
+# The syscall test runner binary and arguments have all been passed as arguments
+# to this shell script.
+exec "$@"