summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--BUILD14
-rw-r--r--CONTRIBUTING.md6
-rw-r--r--Dockerfile2
-rw-r--r--README.md29
-rw-r--r--WORKSPACE18
-rw-r--r--g3doc/README.md2
-rw-r--r--go.mod2
-rw-r--r--go.sum19
-rw-r--r--pkg/abi/BUILD2
-rw-r--r--pkg/abi/linux/BUILD8
-rw-r--r--pkg/abi/linux/capability.go7
-rw-r--r--pkg/abi/linux/clone.go41
-rw-r--r--pkg/abi/linux/epoll.go56
-rw-r--r--pkg/abi/linux/fcntl.go38
-rw-r--r--pkg/abi/linux/file.go98
-rw-r--r--pkg/abi/linux/ioctl.go25
-rw-r--r--pkg/abi/linux/ipc.go6
-rw-r--r--pkg/abi/linux/linux.go2
-rw-r--r--pkg/abi/linux/netdevice.go2
-rw-r--r--pkg/abi/linux/netlink.go8
-rw-r--r--pkg/abi/linux/netlink_route.go12
-rw-r--r--pkg/abi/linux/sched.go6
-rw-r--r--pkg/abi/linux/signal.go2
-rw-r--r--pkg/abi/linux/socket.go2
-rw-r--r--pkg/abi/linux/time.go21
-rw-r--r--pkg/amutex/BUILD3
-rw-r--r--pkg/amutex/amutex.go4
-rw-r--r--pkg/amutex/amutex_test.go2
-rw-r--r--pkg/atomicbitops/BUILD2
-rw-r--r--pkg/binary/BUILD2
-rw-r--r--pkg/bits/BUILD2
-rw-r--r--pkg/bpf/BUILD2
-rw-r--r--pkg/bpf/bpf.go2
-rw-r--r--pkg/bpf/decoder.go2
-rw-r--r--pkg/bpf/decoder_test.go2
-rw-r--r--pkg/bpf/interpreter.go2
-rw-r--r--pkg/bpf/interpreter_test.go4
-rw-r--r--pkg/bpf/program_builder.go2
-rw-r--r--pkg/bpf/program_builder_test.go2
-rw-r--r--pkg/compressio/BUILD2
-rw-r--r--pkg/compressio/compressio.go2
-rw-r--r--pkg/control/client/BUILD2
-rw-r--r--pkg/control/client/client.go4
-rw-r--r--pkg/control/server/BUILD2
-rw-r--r--pkg/control/server/server.go6
-rw-r--r--pkg/cpuid/BUILD2
-rw-r--r--pkg/cpuid/cpuid.go4
-rw-r--r--pkg/eventchannel/BUILD4
-rw-r--r--pkg/eventchannel/event.go8
-rw-r--r--pkg/fd/BUILD2
-rw-r--r--pkg/fd/fd.go4
-rw-r--r--pkg/fdchannel/BUILD17
-rw-r--r--pkg/fdchannel/fdchannel_test.go131
-rw-r--r--pkg/fdchannel/fdchannel_unsafe.go146
-rw-r--r--pkg/fdnotifier/BUILD2
-rw-r--r--pkg/fdnotifier/fdnotifier.go2
-rw-r--r--pkg/fdnotifier/poll_unsafe.go2
-rw-r--r--pkg/flipcall/BUILD31
-rw-r--r--pkg/flipcall/endpoint_futex.go45
-rw-r--r--pkg/flipcall/endpoint_unsafe.go238
-rw-r--r--pkg/flipcall/flipcall.go32
-rw-r--r--pkg/flipcall/flipcall_example_test.go106
-rw-r--r--pkg/flipcall/flipcall_test.go211
-rw-r--r--pkg/flipcall/futex_linux.go94
-rw-r--r--pkg/flipcall/packet_window_allocator.go166
-rw-r--r--pkg/gate/BUILD2
-rw-r--r--pkg/gate/gate_test.go2
-rw-r--r--pkg/ilist/BUILD2
-rw-r--r--pkg/linewriter/BUILD2
-rw-r--r--pkg/log/BUILD2
-rw-r--r--pkg/log/log.go17
-rw-r--r--pkg/memutil/BUILD2
-rw-r--r--pkg/metric/BUILD4
-rw-r--r--pkg/metric/metric.go6
-rw-r--r--pkg/metric/metric_test.go4
-rw-r--r--pkg/p9/BUILD2
-rw-r--r--pkg/p9/client.go4
-rw-r--r--pkg/p9/client_file.go4
-rw-r--r--pkg/p9/client_test.go2
-rw-r--r--pkg/p9/file.go2
-rw-r--r--pkg/p9/handlers.go12
-rw-r--r--pkg/p9/local_server/BUILD14
-rw-r--r--pkg/p9/local_server/local_server.go353
-rw-r--r--pkg/p9/messages.go2
-rw-r--r--pkg/p9/p9.go2
-rw-r--r--pkg/p9/p9test/BUILD4
-rw-r--r--pkg/p9/p9test/client_test.go66
-rw-r--r--pkg/p9/p9test/p9test.go4
-rw-r--r--pkg/p9/path_tree.go208
-rw-r--r--pkg/p9/server.go63
-rw-r--r--pkg/p9/transport.go6
-rw-r--r--pkg/p9/transport_test.go49
-rw-r--r--pkg/procid/BUILD2
-rw-r--r--pkg/rand/BUILD2
-rw-r--r--pkg/refs/BUILD5
-rw-r--r--pkg/refs/refcounter.go164
-rw-r--r--pkg/seccomp/BUILD2
-rw-r--r--pkg/seccomp/seccomp.go8
-rw-r--r--pkg/seccomp/seccomp_amd64.go2
-rw-r--r--pkg/seccomp/seccomp_arm64.go2
-rw-r--r--pkg/seccomp/seccomp_test.go6
-rw-r--r--pkg/seccomp/seccomp_test_victim.go2
-rw-r--r--pkg/seccomp/seccomp_unsafe.go2
-rw-r--r--pkg/secio/BUILD2
-rw-r--r--pkg/segment/set.go2
-rw-r--r--pkg/segment/test/BUILD2
-rw-r--r--pkg/sentry/arch/BUILD4
-rw-r--r--pkg/sentry/arch/arch.go10
-rw-r--r--pkg/sentry/arch/arch_amd64.go8
-rw-r--r--pkg/sentry/arch/arch_state_x86.go4
-rw-r--r--pkg/sentry/arch/arch_x86.go12
-rw-r--r--pkg/sentry/arch/auxv.go2
-rw-r--r--pkg/sentry/arch/signal_amd64.go6
-rw-r--r--pkg/sentry/arch/signal_stack.go2
-rw-r--r--pkg/sentry/arch/stack.go4
-rw-r--r--pkg/sentry/context/BUILD2
-rw-r--r--pkg/sentry/context/context.go4
-rw-r--r--pkg/sentry/context/contexttest/BUILD2
-rw-r--r--pkg/sentry/context/contexttest/contexttest.go22
-rw-r--r--pkg/sentry/control/BUILD6
-rw-r--r--pkg/sentry/control/logging.go136
-rw-r--r--pkg/sentry/control/pprof.go4
-rw-r--r--pkg/sentry/control/proc.go30
-rw-r--r--pkg/sentry/control/proc_test.go6
-rw-r--r--pkg/sentry/control/state.go10
-rw-r--r--pkg/sentry/device/BUILD2
-rw-r--r--pkg/sentry/device/device.go2
-rw-r--r--pkg/sentry/fs/BUILD3
-rw-r--r--pkg/sentry/fs/README.md6
-rw-r--r--pkg/sentry/fs/anon/BUILD2
-rw-r--r--pkg/sentry/fs/anon/anon.go12
-rw-r--r--pkg/sentry/fs/anon/device.go2
-rw-r--r--pkg/sentry/fs/ashmem/BUILD65
-rw-r--r--pkg/sentry/fs/ashmem/area.go308
-rw-r--r--pkg/sentry/fs/ashmem/device.go61
-rw-r--r--pkg/sentry/fs/ashmem/pin_board.go127
-rw-r--r--pkg/sentry/fs/ashmem/pin_board_test.go130
-rw-r--r--pkg/sentry/fs/attr.go32
-rw-r--r--pkg/sentry/fs/binder/BUILD27
-rw-r--r--pkg/sentry/fs/binder/binder.go260
-rw-r--r--pkg/sentry/fs/context.go6
-rw-r--r--pkg/sentry/fs/copy_up.go10
-rw-r--r--pkg/sentry/fs/copy_up_test.go10
-rw-r--r--pkg/sentry/fs/dentry.go2
-rw-r--r--pkg/sentry/fs/dev/BUILD4
-rw-r--r--pkg/sentry/fs/dev/dev.go48
-rw-r--r--pkg/sentry/fs/dev/device.go2
-rw-r--r--pkg/sentry/fs/dev/fs.go43
-rw-r--r--pkg/sentry/fs/dev/full.go14
-rw-r--r--pkg/sentry/fs/dev/null.go17
-rw-r--r--pkg/sentry/fs/dev/random.go16
-rw-r--r--pkg/sentry/fs/dirent.go39
-rw-r--r--pkg/sentry/fs/dirent_cache.go4
-rw-r--r--pkg/sentry/fs/dirent_refs_test.go33
-rw-r--r--pkg/sentry/fs/dirent_state.go2
-rw-r--r--pkg/sentry/fs/ext/BUILD11
-rw-r--r--pkg/sentry/fs/ext/assets/README.md28
-rw-r--r--pkg/sentry/fs/ext/assets/bigfile.txt41
-rw-r--r--pkg/sentry/fs/ext/assets/file.txt1
l---------pkg/sentry/fs/ext/assets/symlink.txt1
-rw-r--r--pkg/sentry/fs/ext/assets/tiny.ext4bin0 -> 65536 bytes
-rw-r--r--pkg/sentry/fs/ext/disklayout/BUILD46
-rw-r--r--pkg/sentry/fs/ext/disklayout/block_group.go127
-rw-r--r--pkg/sentry/fs/ext/disklayout/block_group_32.go72
-rw-r--r--pkg/sentry/fs/ext/disklayout/block_group_64.go93
-rw-r--r--pkg/sentry/fs/ext/disklayout/block_group_test.go (renamed from pkg/abi/linux/ashmem.go)21
-rw-r--r--pkg/sentry/fs/ext/disklayout/dirent.go69
-rw-r--r--pkg/sentry/fs/ext/disklayout/dirent_new.go61
-rw-r--r--pkg/sentry/fs/ext/disklayout/dirent_old.go50
-rw-r--r--pkg/sentry/fs/ext/disklayout/dirent_test.go (renamed from pkg/abi/linux/binder.go)18
-rw-r--r--pkg/sentry/fs/ext/disklayout/disklayout.go50
-rw-r--r--pkg/sentry/fs/ext/disklayout/inode.go267
-rw-r--r--pkg/sentry/fs/ext/disklayout/inode_new.go96
-rw-r--r--pkg/sentry/fs/ext/disklayout/inode_old.go117
-rw-r--r--pkg/sentry/fs/ext/disklayout/inode_test.go222
-rw-r--r--pkg/sentry/fs/ext/disklayout/superblock.go468
-rw-r--r--pkg/sentry/fs/ext/disklayout/superblock_32.go75
-rw-r--r--pkg/sentry/fs/ext/disklayout/superblock_64.go94
-rw-r--r--pkg/sentry/fs/ext/disklayout/superblock_old.go102
-rw-r--r--pkg/sentry/fs/ext/disklayout/superblock_test.go27
-rw-r--r--pkg/sentry/fs/ext/disklayout/test_utils.go30
-rw-r--r--pkg/sentry/fs/ext/ext.go49
-rw-r--r--pkg/sentry/fs/ext4/BUILD14
-rw-r--r--pkg/sentry/fs/ext4/fs.go61
-rw-r--r--pkg/sentry/fs/fdpipe/BUILD4
-rw-r--r--pkg/sentry/fs/fdpipe/pipe.go22
-rw-r--r--pkg/sentry/fs/fdpipe/pipe_opener.go8
-rw-r--r--pkg/sentry/fs/fdpipe/pipe_opener_test.go14
-rw-r--r--pkg/sentry/fs/fdpipe/pipe_state.go4
-rw-r--r--pkg/sentry/fs/fdpipe/pipe_test.go70
-rw-r--r--pkg/sentry/fs/file.go45
-rw-r--r--pkg/sentry/fs/file_operations.go23
-rw-r--r--pkg/sentry/fs/file_overlay.go150
-rw-r--r--pkg/sentry/fs/file_overlay_test.go22
-rw-r--r--pkg/sentry/fs/filesystems.go2
-rw-r--r--pkg/sentry/fs/filetest/BUILD2
-rw-r--r--pkg/sentry/fs/filetest/filetest.go16
-rw-r--r--pkg/sentry/fs/flags.go5
-rw-r--r--pkg/sentry/fs/fs.go4
-rw-r--r--pkg/sentry/fs/fsutil/BUILD12
-rw-r--r--pkg/sentry/fs/fsutil/dirty_set.go10
-rw-r--r--pkg/sentry/fs/fsutil/dirty_set_test.go4
-rw-r--r--pkg/sentry/fs/fsutil/file.go18
-rw-r--r--pkg/sentry/fs/fsutil/file_range_set.go14
-rw-r--r--pkg/sentry/fs/fsutil/frame_ref_set.go2
-rw-r--r--pkg/sentry/fs/fsutil/host_file_mapper.go10
-rw-r--r--pkg/sentry/fs/fsutil/host_file_mapper_unsafe.go2
-rw-r--r--pkg/sentry/fs/fsutil/host_mappable.go12
-rw-r--r--pkg/sentry/fs/fsutil/inode.go14
-rw-r--r--pkg/sentry/fs/fsutil/inode_cached.go22
-rw-r--r--pkg/sentry/fs/fsutil/inode_cached_test.go22
-rw-r--r--pkg/sentry/fs/g3doc/inotify.md18
-rw-r--r--pkg/sentry/fs/gofer/BUILD2
-rw-r--r--pkg/sentry/fs/gofer/attr.go12
-rw-r--r--pkg/sentry/fs/gofer/cache_policy.go4
-rw-r--r--pkg/sentry/fs/gofer/context_file.go6
-rw-r--r--pkg/sentry/fs/gofer/device.go2
-rw-r--r--pkg/sentry/fs/gofer/file.go24
-rw-r--r--pkg/sentry/fs/gofer/file_state.go4
-rw-r--r--pkg/sentry/fs/gofer/fs.go6
-rw-r--r--pkg/sentry/fs/gofer/gofer_test.go16
-rw-r--r--pkg/sentry/fs/gofer/handles.go21
-rw-r--r--pkg/sentry/fs/gofer/inode.go26
-rw-r--r--pkg/sentry/fs/gofer/inode_state.go10
-rw-r--r--pkg/sentry/fs/gofer/path.go27
-rw-r--r--pkg/sentry/fs/gofer/session.go36
-rw-r--r--pkg/sentry/fs/gofer/session_state.go9
-rw-r--r--pkg/sentry/fs/gofer/socket.go25
-rw-r--r--pkg/sentry/fs/gofer/util.go6
-rw-r--r--pkg/sentry/fs/host/BUILD2
-rw-r--r--pkg/sentry/fs/host/control.go8
-rw-r--r--pkg/sentry/fs/host/descriptor.go6
-rw-r--r--pkg/sentry/fs/host/descriptor_test.go4
-rw-r--r--pkg/sentry/fs/host/device.go2
-rw-r--r--pkg/sentry/fs/host/file.go28
-rw-r--r--pkg/sentry/fs/host/fs.go8
-rw-r--r--pkg/sentry/fs/host/fs_test.go6
-rw-r--r--pkg/sentry/fs/host/inode.go30
-rw-r--r--pkg/sentry/fs/host/inode_state.go6
-rw-r--r--pkg/sentry/fs/host/inode_test.go10
-rw-r--r--pkg/sentry/fs/host/ioctl_unsafe.go2
-rw-r--r--pkg/sentry/fs/host/socket.go50
-rw-r--r--pkg/sentry/fs/host/socket_iovec.go4
-rw-r--r--pkg/sentry/fs/host/socket_state.go2
-rw-r--r--pkg/sentry/fs/host/socket_test.go20
-rw-r--r--pkg/sentry/fs/host/tty.go18
-rw-r--r--pkg/sentry/fs/host/util.go14
-rw-r--r--pkg/sentry/fs/host/util_unsafe.go6
-rw-r--r--pkg/sentry/fs/host/wait_test.go6
-rw-r--r--pkg/sentry/fs/inode.go45
-rw-r--r--pkg/sentry/fs/inode_operations.go8
-rw-r--r--pkg/sentry/fs/inode_overlay.go64
-rw-r--r--pkg/sentry/fs/inode_overlay_test.go22
-rw-r--r--pkg/sentry/fs/inotify.go18
-rw-r--r--pkg/sentry/fs/inotify_event.go4
-rw-r--r--pkg/sentry/fs/inotify_watch.go2
-rw-r--r--pkg/sentry/fs/lock/BUILD2
-rw-r--r--pkg/sentry/fs/lock/lock.go4
-rw-r--r--pkg/sentry/fs/mock.go16
-rw-r--r--pkg/sentry/fs/mount.go68
-rw-r--r--pkg/sentry/fs/mount_overlay.go17
-rw-r--r--pkg/sentry/fs/mount_test.go2
-rw-r--r--pkg/sentry/fs/mounts.go36
-rw-r--r--pkg/sentry/fs/mounts_test.go18
-rw-r--r--pkg/sentry/fs/offset.go2
-rw-r--r--pkg/sentry/fs/overlay.go36
-rw-r--r--pkg/sentry/fs/proc/BUILD4
-rw-r--r--pkg/sentry/fs/proc/cgroup.go4
-rw-r--r--pkg/sentry/fs/proc/cpuinfo.go6
-rw-r--r--pkg/sentry/fs/proc/device/BUILD2
-rw-r--r--pkg/sentry/fs/proc/device/device.go2
-rw-r--r--pkg/sentry/fs/proc/exec_args.go18
-rw-r--r--pkg/sentry/fs/proc/fds.go62
-rw-r--r--pkg/sentry/fs/proc/filesystems.go6
-rw-r--r--pkg/sentry/fs/proc/fs.go8
-rw-r--r--pkg/sentry/fs/proc/inode.go24
-rw-r--r--pkg/sentry/fs/proc/loadavg.go4
-rw-r--r--pkg/sentry/fs/proc/meminfo.go10
-rw-r--r--pkg/sentry/fs/proc/mounts.go8
-rw-r--r--pkg/sentry/fs/proc/net.go193
-rw-r--r--pkg/sentry/fs/proc/net_test.go4
-rw-r--r--pkg/sentry/fs/proc/proc.go28
-rw-r--r--pkg/sentry/fs/proc/rpcinet_proc.go32
-rw-r--r--pkg/sentry/fs/proc/seqfile/BUILD2
-rw-r--r--pkg/sentry/fs/proc/seqfile/seqfile.go21
-rw-r--r--pkg/sentry/fs/proc/seqfile/seqfile_test.go18
-rw-r--r--pkg/sentry/fs/proc/stat.go8
-rw-r--r--pkg/sentry/fs/proc/sys.go28
-rw-r--r--pkg/sentry/fs/proc/sys_net.go28
-rw-r--r--pkg/sentry/fs/proc/sys_net_test.go6
-rw-r--r--pkg/sentry/fs/proc/task.go68
-rw-r--r--pkg/sentry/fs/proc/uid_gid_map.go20
-rw-r--r--pkg/sentry/fs/proc/uptime.go18
-rw-r--r--pkg/sentry/fs/proc/version.go6
-rw-r--r--pkg/sentry/fs/ramfs/BUILD2
-rw-r--r--pkg/sentry/fs/ramfs/dir.go21
-rw-r--r--pkg/sentry/fs/ramfs/socket.go12
-rw-r--r--pkg/sentry/fs/ramfs/symlink.go10
-rw-r--r--pkg/sentry/fs/ramfs/tree.go10
-rw-r--r--pkg/sentry/fs/ramfs/tree_test.go6
-rw-r--r--pkg/sentry/fs/save.go2
-rw-r--r--pkg/sentry/fs/splice.go8
-rw-r--r--pkg/sentry/fs/sys/BUILD2
-rw-r--r--pkg/sentry/fs/sys/device.go2
-rw-r--r--pkg/sentry/fs/sys/devices.go12
-rw-r--r--pkg/sentry/fs/sys/fs.go8
-rw-r--r--pkg/sentry/fs/sys/sys.go14
-rw-r--r--pkg/sentry/fs/timerfd/BUILD2
-rw-r--r--pkg/sentry/fs/timerfd/timerfd.go18
-rw-r--r--pkg/sentry/fs/tmpfs/BUILD2
-rw-r--r--pkg/sentry/fs/tmpfs/device.go2
-rw-r--r--pkg/sentry/fs/tmpfs/file_regular.go12
-rw-r--r--pkg/sentry/fs/tmpfs/file_test.go16
-rw-r--r--pkg/sentry/fs/tmpfs/fs.go38
-rw-r--r--pkg/sentry/fs/tmpfs/inode_file.go26
-rw-r--r--pkg/sentry/fs/tmpfs/tmpfs.go32
-rw-r--r--pkg/sentry/fs/tty/BUILD2
-rw-r--r--pkg/sentry/fs/tty/dir.go26
-rw-r--r--pkg/sentry/fs/tty/fs.go17
-rw-r--r--pkg/sentry/fs/tty/line_discipline.go12
-rw-r--r--pkg/sentry/fs/tty/master.go22
-rw-r--r--pkg/sentry/fs/tty/queue.go14
-rw-r--r--pkg/sentry/fs/tty/slave.go20
-rw-r--r--pkg/sentry/fs/tty/terminal.go10
-rw-r--r--pkg/sentry/fs/tty/tty_test.go6
-rw-r--r--pkg/sentry/hostcpu/BUILD2
-rw-r--r--pkg/sentry/hostmm/BUILD2
-rw-r--r--pkg/sentry/hostmm/hostmm.go6
-rw-r--r--pkg/sentry/inet/BUILD2
-rw-r--r--pkg/sentry/inet/context.go2
-rw-r--r--pkg/sentry/kernel/BUILD23
-rw-r--r--pkg/sentry/kernel/abstract_socket_namespace.go4
-rw-r--r--pkg/sentry/kernel/auth/BUILD14
-rw-r--r--pkg/sentry/kernel/auth/capability_set.go6
-rw-r--r--pkg/sentry/kernel/auth/context.go2
-rw-r--r--pkg/sentry/kernel/auth/credentials.go4
-rw-r--r--pkg/sentry/kernel/auth/id_map.go6
-rw-r--r--pkg/sentry/kernel/auth/user_namespace.go2
-rw-r--r--pkg/sentry/kernel/context.go4
-rw-r--r--pkg/sentry/kernel/contexttest/BUILD2
-rw-r--r--pkg/sentry/kernel/contexttest/contexttest.go10
-rw-r--r--pkg/sentry/kernel/epoll/BUILD3
-rw-r--r--pkg/sentry/kernel/epoll/epoll.go19
-rw-r--r--pkg/sentry/kernel/epoll/epoll_state.go4
-rw-r--r--pkg/sentry/kernel/epoll/epoll_test.go6
-rw-r--r--pkg/sentry/kernel/eventfd/BUILD2
-rw-r--r--pkg/sentry/kernel/eventfd/eventfd.go20
-rw-r--r--pkg/sentry/kernel/eventfd/eventfd_test.go6
-rw-r--r--pkg/sentry/kernel/fasync/BUILD2
-rw-r--r--pkg/sentry/kernel/fasync/fasync.go55
-rw-r--r--pkg/sentry/kernel/fd_map.go364
-rw-r--r--pkg/sentry/kernel/fd_map_test.go136
-rw-r--r--pkg/sentry/kernel/fd_table.go380
-rw-r--r--pkg/sentry/kernel/fd_table_test.go192
-rw-r--r--pkg/sentry/kernel/fd_table_unsafe.go103
-rw-r--r--pkg/sentry/kernel/fs_context.go12
-rw-r--r--pkg/sentry/kernel/futex/BUILD6
-rw-r--r--pkg/sentry/kernel/futex/futex.go33
-rw-r--r--pkg/sentry/kernel/futex/futex_test.go2
-rw-r--r--pkg/sentry/kernel/ipc_namespace.go8
-rw-r--r--pkg/sentry/kernel/kdefs/BUILD10
-rw-r--r--pkg/sentry/kernel/kernel.go183
-rw-r--r--pkg/sentry/kernel/kernel_state.go4
-rw-r--r--pkg/sentry/kernel/memevent/BUILD4
-rw-r--r--pkg/sentry/kernel/memevent/memory_events.go12
-rw-r--r--pkg/sentry/kernel/pending_signals.go6
-rw-r--r--pkg/sentry/kernel/pending_signals_state.go2
-rw-r--r--pkg/sentry/kernel/pipe/BUILD2
-rw-r--r--pkg/sentry/kernel/pipe/buffer.go2
-rw-r--r--pkg/sentry/kernel/pipe/buffer_test.go2
-rw-r--r--pkg/sentry/kernel/pipe/device.go2
-rw-r--r--pkg/sentry/kernel/pipe/node.go12
-rw-r--r--pkg/sentry/kernel/pipe/node_test.go14
-rw-r--r--pkg/sentry/kernel/pipe/pipe.go38
-rw-r--r--pkg/sentry/kernel/pipe/pipe_test.go8
-rw-r--r--pkg/sentry/kernel/pipe/reader.go2
-rw-r--r--pkg/sentry/kernel/pipe/reader_writer.go16
-rw-r--r--pkg/sentry/kernel/pipe/writer.go2
-rw-r--r--pkg/sentry/kernel/posixtimer.go8
-rw-r--r--pkg/sentry/kernel/ptrace.go10
-rw-r--r--pkg/sentry/kernel/ptrace_amd64.go6
-rw-r--r--pkg/sentry/kernel/ptrace_arm64.go6
-rw-r--r--pkg/sentry/kernel/rseq.go6
-rw-r--r--pkg/sentry/kernel/sched/BUILD2
-rw-r--r--pkg/sentry/kernel/seccomp.go12
-rw-r--r--pkg/sentry/kernel/semaphore/BUILD2
-rw-r--r--pkg/sentry/kernel/semaphore/semaphore.go16
-rw-r--r--pkg/sentry/kernel/semaphore/semaphore_test.go10
-rw-r--r--pkg/sentry/kernel/sessions.go24
-rw-r--r--pkg/sentry/kernel/shm/BUILD2
-rw-r--r--pkg/sentry/kernel/shm/device.go2
-rw-r--r--pkg/sentry/kernel/shm/shm.go27
-rw-r--r--pkg/sentry/kernel/signal.go8
-rw-r--r--pkg/sentry/kernel/signal_handlers.go4
-rw-r--r--pkg/sentry/kernel/syscalls.go8
-rw-r--r--pkg/sentry/kernel/syslog.go1
-rw-r--r--pkg/sentry/kernel/table_test.go4
-rw-r--r--pkg/sentry/kernel/task.go118
-rw-r--r--pkg/sentry/kernel/task_acct.go10
-rw-r--r--pkg/sentry/kernel/task_block.go4
-rw-r--r--pkg/sentry/kernel/task_clone.go60
-rw-r--r--pkg/sentry/kernel/task_context.go20
-rw-r--r--pkg/sentry/kernel/task_exec.go12
-rw-r--r--pkg/sentry/kernel/task_exit.go14
-rw-r--r--pkg/sentry/kernel/task_futex.go8
-rw-r--r--pkg/sentry/kernel/task_identity.go228
-rw-r--r--pkg/sentry/kernel/task_log.go6
-rw-r--r--pkg/sentry/kernel/task_net.go2
-rw-r--r--pkg/sentry/kernel/task_run.go14
-rw-r--r--pkg/sentry/kernel/task_sched.go14
-rw-r--r--pkg/sentry/kernel/task_signals.go14
-rw-r--r--pkg/sentry/kernel/task_start.go29
-rw-r--r--pkg/sentry/kernel/task_stop.go2
-rw-r--r--pkg/sentry/kernel/task_syscall.go14
-rw-r--r--pkg/sentry/kernel/task_test.go2
-rw-r--r--pkg/sentry/kernel/task_usermem.go6
-rw-r--r--pkg/sentry/kernel/thread_group.go23
-rw-r--r--pkg/sentry/kernel/threads.go4
-rw-r--r--pkg/sentry/kernel/time/BUILD2
-rw-r--r--pkg/sentry/kernel/time/context.go2
-rw-r--r--pkg/sentry/kernel/time/time.go11
-rw-r--r--pkg/sentry/kernel/timekeeper.go12
-rw-r--r--pkg/sentry/kernel/timekeeper_state.go2
-rw-r--r--pkg/sentry/kernel/timekeeper_test.go12
-rw-r--r--pkg/sentry/kernel/uts_namespace.go2
-rw-r--r--pkg/sentry/kernel/vdso.go10
-rw-r--r--pkg/sentry/limits/BUILD2
-rw-r--r--pkg/sentry/limits/context.go2
-rw-r--r--pkg/sentry/limits/linux.go2
-rw-r--r--pkg/sentry/loader/BUILD2
-rw-r--r--pkg/sentry/loader/elf.go26
-rw-r--r--pkg/sentry/loader/interpreter.go8
-rw-r--r--pkg/sentry/loader/loader.go26
-rw-r--r--pkg/sentry/loader/vdso.go41
-rw-r--r--pkg/sentry/memmap/BUILD2
-rw-r--r--pkg/sentry/memmap/mapping_set.go4
-rw-r--r--pkg/sentry/memmap/mapping_set_test.go2
-rw-r--r--pkg/sentry/memmap/memmap.go8
-rw-r--r--pkg/sentry/mm/BUILD8
-rw-r--r--pkg/sentry/mm/README.md8
-rw-r--r--pkg/sentry/mm/address_space.go6
-rw-r--r--pkg/sentry/mm/aio_context.go22
-rw-r--r--pkg/sentry/mm/debug.go2
-rw-r--r--pkg/sentry/mm/io.go10
-rw-r--r--pkg/sentry/mm/lifecycle.go53
-rw-r--r--pkg/sentry/mm/metadata.go6
-rw-r--r--pkg/sentry/mm/mm.go21
-rw-r--r--pkg/sentry/mm/mm_test.go18
-rw-r--r--pkg/sentry/mm/pma.go16
-rw-r--r--pkg/sentry/mm/procfs.go8
-rw-r--r--pkg/sentry/mm/save_restore.go2
-rw-r--r--pkg/sentry/mm/shm.go8
-rw-r--r--pkg/sentry/mm/special_mappable.go20
-rw-r--r--pkg/sentry/mm/syscalls.go44
-rw-r--r--pkg/sentry/mm/vma.go19
-rw-r--r--pkg/sentry/pgalloc/BUILD4
-rw-r--r--pkg/sentry/pgalloc/context.go2
-rw-r--r--pkg/sentry/pgalloc/pgalloc.go16
-rw-r--r--pkg/sentry/pgalloc/pgalloc_test.go2
-rw-r--r--pkg/sentry/pgalloc/save_restore.go8
-rw-r--r--pkg/sentry/platform/BUILD3
-rw-r--r--pkg/sentry/platform/context.go2
-rw-r--r--pkg/sentry/platform/interrupt/BUILD2
-rw-r--r--pkg/sentry/platform/kvm/BUILD4
-rw-r--r--pkg/sentry/platform/kvm/address_space.go8
-rw-r--r--pkg/sentry/platform/kvm/allocator.go2
-rw-r--r--pkg/sentry/platform/kvm/bluepill.go4
-rw-r--r--pkg/sentry/platform/kvm/bluepill_amd64.go4
-rw-r--r--pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go6
-rw-r--r--pkg/sentry/platform/kvm/bluepill_fault.go2
-rw-r--r--pkg/sentry/platform/kvm/context.go10
-rw-r--r--pkg/sentry/platform/kvm/filters.go33
-rw-r--r--pkg/sentry/platform/kvm/kvm.go24
-rw-r--r--pkg/sentry/platform/kvm/kvm_amd64.go2
-rw-r--r--pkg/sentry/platform/kvm/kvm_test.go12
-rw-r--r--pkg/sentry/platform/kvm/machine.go21
-rw-r--r--pkg/sentry/platform/kvm/machine_amd64.go10
-rw-r--r--pkg/sentry/platform/kvm/machine_amd64_unsafe.go6
-rw-r--r--pkg/sentry/platform/kvm/machine_unsafe.go2
-rw-r--r--pkg/sentry/platform/kvm/physical_map.go6
-rw-r--r--pkg/sentry/platform/kvm/testutil/BUILD2
-rw-r--r--pkg/sentry/platform/kvm/virtual_map.go2
-rw-r--r--pkg/sentry/platform/kvm/virtual_map_test.go2
-rw-r--r--pkg/sentry/platform/mmap_min_addr.go2
-rw-r--r--pkg/sentry/platform/platform.go38
-rw-r--r--pkg/sentry/platform/ptrace/BUILD3
-rw-r--r--pkg/sentry/platform/ptrace/filters.go33
-rw-r--r--pkg/sentry/platform/ptrace/ptrace.go25
-rw-r--r--pkg/sentry/platform/ptrace/ptrace_unsafe.go4
-rw-r--r--pkg/sentry/platform/ptrace/stub_unsafe.go4
-rw-r--r--pkg/sentry/platform/ptrace/subprocess.go50
-rw-r--r--pkg/sentry/platform/ptrace/subprocess_amd64.go39
-rw-r--r--pkg/sentry/platform/ptrace/subprocess_linux.go15
-rw-r--r--pkg/sentry/platform/ptrace/subprocess_linux_amd64_unsafe.go2
-rw-r--r--pkg/sentry/platform/ring0/BUILD2
-rw-r--r--pkg/sentry/platform/ring0/defs.go2
-rw-r--r--pkg/sentry/platform/ring0/defs_amd64.go2
-rw-r--r--pkg/sentry/platform/ring0/entry_amd64.s2
-rw-r--r--pkg/sentry/platform/ring0/kernel_amd64.go8
-rw-r--r--pkg/sentry/platform/ring0/lib_amd64.go2
-rw-r--r--pkg/sentry/platform/ring0/pagetables/BUILD2
-rw-r--r--pkg/sentry/platform/ring0/pagetables/allocator_unsafe.go2
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables.go2
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go2
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables_test.go2
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables_x86.go2
-rw-r--r--pkg/sentry/platform/ring0/x86.go2
-rw-r--r--pkg/sentry/platform/safecopy/BUILD2
-rw-r--r--pkg/sentry/platform/safecopy/safecopy.go2
-rw-r--r--pkg/sentry/safemem/BUILD2
-rw-r--r--pkg/sentry/safemem/block_unsafe.go2
-rw-r--r--pkg/sentry/sighandling/BUILD2
-rw-r--r--pkg/sentry/sighandling/sighandling.go2
-rw-r--r--pkg/sentry/sighandling/sighandling_unsafe.go2
-rw-r--r--pkg/sentry/socket/BUILD3
-rw-r--r--pkg/sentry/socket/control/BUILD5
-rw-r--r--pkg/sentry/socket/control/control.go28
-rw-r--r--pkg/sentry/socket/epsocket/BUILD3
-rw-r--r--pkg/sentry/socket/epsocket/device.go2
-rw-r--r--pkg/sentry/socket/epsocket/epsocket.go128
-rw-r--r--pkg/sentry/socket/epsocket/provider.go30
-rw-r--r--pkg/sentry/socket/epsocket/save_restore.go2
-rw-r--r--pkg/sentry/socket/epsocket/stack.go16
-rw-r--r--pkg/sentry/socket/hostinet/BUILD3
-rw-r--r--pkg/sentry/socket/hostinet/device.go2
-rw-r--r--pkg/sentry/socket/hostinet/socket.go43
-rw-r--r--pkg/sentry/socket/hostinet/socket_unsafe.go15
-rw-r--r--pkg/sentry/socket/hostinet/stack.go12
-rw-r--r--pkg/sentry/socket/netlink/BUILD3
-rw-r--r--pkg/sentry/socket/netlink/message.go6
-rw-r--r--pkg/sentry/socket/netlink/port/BUILD2
-rw-r--r--pkg/sentry/socket/netlink/provider.go14
-rw-r--r--pkg/sentry/socket/netlink/route/BUILD2
-rw-r--r--pkg/sentry/socket/netlink/route/protocol.go14
-rw-r--r--pkg/sentry/socket/netlink/socket.go45
-rw-r--r--pkg/sentry/socket/rpcinet/BUILD5
-rw-r--r--pkg/sentry/socket/rpcinet/conn/BUILD2
-rw-r--r--pkg/sentry/socket/rpcinet/conn/conn.go8
-rw-r--r--pkg/sentry/socket/rpcinet/device.go2
-rw-r--r--pkg/sentry/socket/rpcinet/notifier/BUILD2
-rw-r--r--pkg/sentry/socket/rpcinet/notifier/notifier.go6
-rw-r--r--pkg/sentry/socket/rpcinet/socket.go56
-rw-r--r--pkg/sentry/socket/rpcinet/stack.go12
-rw-r--r--pkg/sentry/socket/rpcinet/stack_unsafe.go10
-rw-r--r--pkg/sentry/socket/socket.go29
-rw-r--r--pkg/sentry/socket/unix/BUILD3
-rw-r--r--pkg/sentry/socket/unix/device.go2
-rw-r--r--pkg/sentry/socket/unix/io.go15
-rw-r--r--pkg/sentry/socket/unix/transport/BUILD3
-rw-r--r--pkg/sentry/socket/unix/transport/connectioned.go36
-rw-r--r--pkg/sentry/socket/unix/transport/connectionless.go62
-rw-r--r--pkg/sentry/socket/unix/transport/queue.go8
-rw-r--r--pkg/sentry/socket/unix/transport/unix.go25
-rw-r--r--pkg/sentry/socket/unix/unix.go69
-rw-r--r--pkg/sentry/state/BUILD2
-rw-r--r--pkg/sentry/state/state.go12
-rw-r--r--pkg/sentry/state/state_metadata.go2
-rw-r--r--pkg/sentry/state/state_unsafe.go2
-rw-r--r--pkg/sentry/strace/BUILD5
-rw-r--r--pkg/sentry/strace/capability.go6
-rw-r--r--pkg/sentry/strace/clone.go2
-rw-r--r--pkg/sentry/strace/futex.go4
-rw-r--r--pkg/sentry/strace/open.go2
-rw-r--r--pkg/sentry/strace/poll.go13
-rw-r--r--pkg/sentry/strace/ptrace.go4
-rw-r--r--pkg/sentry/strace/signal.go8
-rw-r--r--pkg/sentry/strace/socket.go20
-rw-r--r--pkg/sentry/strace/strace.go27
-rw-r--r--pkg/sentry/strace/syscalls.go6
-rw-r--r--pkg/sentry/syscalls/BUILD3
-rw-r--r--pkg/sentry/syscalls/epoll.go64
-rw-r--r--pkg/sentry/syscalls/linux/BUILD4
-rw-r--r--pkg/sentry/syscalls/linux/error.go19
-rw-r--r--pkg/sentry/syscalls/linux/flags.go4
-rw-r--r--pkg/sentry/syscalls/linux/linux64.go619
-rw-r--r--pkg/sentry/syscalls/linux/sigset.go12
-rw-r--r--pkg/sentry/syscalls/linux/sys_aio.go25
-rw-r--r--pkg/sentry/syscalls/linux/sys_capability.go10
-rw-r--r--pkg/sentry/syscalls/linux/sys_epoll.go47
-rw-r--r--pkg/sentry/syscalls/linux/sys_eventfd.go18
-rw-r--r--pkg/sentry/syscalls/linux/sys_file.go399
-rw-r--r--pkg/sentry/syscalls/linux/sys_futex.go12
-rw-r--r--pkg/sentry/syscalls/linux/sys_getdents.go43
-rw-r--r--pkg/sentry/syscalls/linux/sys_identity.go8
-rw-r--r--pkg/sentry/syscalls/linux/sys_inotify.go42
-rw-r--r--pkg/sentry/syscalls/linux/sys_lseek.go13
-rw-r--r--pkg/sentry/syscalls/linux/sys_mempolicy.go10
-rw-r--r--pkg/sentry/syscalls/linux/sys_mmap.go25
-rw-r--r--pkg/sentry/syscalls/linux/sys_mount.go16
-rw-r--r--pkg/sentry/syscalls/linux/sys_pipe.go40
-rw-r--r--pkg/sentry/syscalls/linux/sys_poll.go23
-rw-r--r--pkg/sentry/syscalls/linux/sys_prctl.go49
-rw-r--r--pkg/sentry/syscalls/linux/sys_random.go12
-rw-r--r--pkg/sentry/syscalls/linux/sys_read.go39
-rw-r--r--pkg/sentry/syscalls/linux/sys_rlimit.go12
-rw-r--r--pkg/sentry/syscalls/linux/sys_rusage.go12
-rw-r--r--pkg/sentry/syscalls/linux/sys_sched.go29
-rw-r--r--pkg/sentry/syscalls/linux/sys_seccomp.go19
-rw-r--r--pkg/sentry/syscalls/linux/sys_sem.go14
-rw-r--r--pkg/sentry/syscalls/linux/sys_shm.go10
-rw-r--r--pkg/sentry/syscalls/linux/sys_signal.go8
-rw-r--r--pkg/sentry/syscalls/linux/sys_socket.go267
-rw-r--r--pkg/sentry/syscalls/linux/sys_splice.go46
-rw-r--r--pkg/sentry/syscalls/linux/sys_stat.go153
-rw-r--r--pkg/sentry/syscalls/linux/sys_sync.go27
-rw-r--r--pkg/sentry/syscalls/linux/sys_sysinfo.go8
-rw-r--r--pkg/sentry/syscalls/linux/sys_syslog.go6
-rw-r--r--pkg/sentry/syscalls/linux/sys_thread.go106
-rw-r--r--pkg/sentry/syscalls/linux/sys_time.go12
-rw-r--r--pkg/sentry/syscalls/linux/sys_timer.go14
-rw-r--r--pkg/sentry/syscalls/linux/sys_timerfd.go27
-rw-r--r--pkg/sentry/syscalls/linux/sys_tls.go13
-rw-r--r--pkg/sentry/syscalls/linux/sys_utsname.go8
-rw-r--r--pkg/sentry/syscalls/linux/sys_write.go39
-rw-r--r--pkg/sentry/syscalls/linux/timespec.go15
-rw-r--r--pkg/sentry/syscalls/syscalls.go30
-rw-r--r--pkg/sentry/time/BUILD6
-rw-r--r--pkg/sentry/time/calibrated_clock.go6
-rw-r--r--pkg/sentry/time/parameters.go2
-rw-r--r--pkg/sentry/time/sampler.go2
-rw-r--r--pkg/sentry/unimpl/BUILD4
-rw-r--r--pkg/sentry/unimpl/events.go4
-rw-r--r--pkg/sentry/uniqueid/BUILD2
-rw-r--r--pkg/sentry/uniqueid/context.go4
-rw-r--r--pkg/sentry/usage/BUILD2
-rw-r--r--pkg/sentry/usage/memory.go4
-rw-r--r--pkg/sentry/usermem/BUILD2
-rw-r--r--pkg/sentry/usermem/bytes_io.go6
-rw-r--r--pkg/sentry/usermem/bytes_io_unsafe.go4
-rw-r--r--pkg/sentry/usermem/usermem.go8
-rw-r--r--pkg/sentry/usermem/usermem_test.go6
-rw-r--r--pkg/sentry/watchdog/BUILD2
-rw-r--r--pkg/sentry/watchdog/watchdog.go22
-rw-r--r--pkg/sleep/BUILD2
-rw-r--r--pkg/state/BUILD4
-rw-r--r--pkg/state/decode.go2
-rw-r--r--pkg/state/encode.go4
-rw-r--r--pkg/state/map.go2
-rw-r--r--pkg/state/printer.go2
-rw-r--r--pkg/state/state.go2
-rw-r--r--pkg/state/statefile/BUILD2
-rw-r--r--pkg/state/statefile/statefile.go4
-rw-r--r--pkg/state/statefile/statefile_test.go2
-rw-r--r--pkg/syserr/BUILD2
-rw-r--r--pkg/syserr/netstack.go79
-rw-r--r--pkg/syserr/syserr.go4
-rw-r--r--pkg/syserror/BUILD2
-rw-r--r--pkg/syserror/syserror.go2
-rw-r--r--pkg/syserror/syserror_test.go2
-rw-r--r--pkg/tcpip/BUILD2
-rw-r--r--pkg/tcpip/adapters/gonet/BUILD2
-rw-r--r--pkg/tcpip/adapters/gonet/gonet.go12
-rw-r--r--pkg/tcpip/adapters/gonet/gonet_test.go16
-rw-r--r--pkg/tcpip/buffer/BUILD2
-rw-r--r--pkg/tcpip/buffer/view.go2
-rw-r--r--pkg/tcpip/checker/BUILD2
-rw-r--r--pkg/tcpip/checker/checker.go6
-rw-r--r--pkg/tcpip/hash/jenkins/BUILD2
-rw-r--r--pkg/tcpip/header/BUILD2
-rw-r--r--pkg/tcpip/header/arp.go2
-rw-r--r--pkg/tcpip/header/checksum.go4
-rw-r--r--pkg/tcpip/header/eth.go2
-rw-r--r--pkg/tcpip/header/icmpv4.go2
-rw-r--r--pkg/tcpip/header/icmpv6.go2
-rw-r--r--pkg/tcpip/header/interfaces.go2
-rw-r--r--pkg/tcpip/header/ipv4.go6
-rw-r--r--pkg/tcpip/header/ipv6.go2
-rw-r--r--pkg/tcpip/header/ipv6_fragment.go2
-rw-r--r--pkg/tcpip/header/ipversion_test.go2
-rw-r--r--pkg/tcpip/header/tcp.go21
-rw-r--r--pkg/tcpip/header/tcp_test.go2
-rw-r--r--pkg/tcpip/header/udp.go2
-rw-r--r--pkg/tcpip/iptables/BUILD18
-rw-r--r--pkg/tcpip/iptables/iptables.go81
-rw-r--r--pkg/tcpip/iptables/targets.go35
-rw-r--r--pkg/tcpip/iptables/types.go183
-rw-r--r--pkg/tcpip/link/channel/BUILD2
-rw-r--r--pkg/tcpip/link/channel/channel.go6
-rw-r--r--pkg/tcpip/link/fdbased/BUILD2
-rw-r--r--pkg/tcpip/link/fdbased/endpoint.go10
-rw-r--r--pkg/tcpip/link/fdbased/endpoint_test.go10
-rw-r--r--pkg/tcpip/link/fdbased/mmap.go2
-rw-r--r--pkg/tcpip/link/fdbased/mmap_amd64.go8
-rw-r--r--pkg/tcpip/link/fdbased/packet_dispatchers.go10
-rw-r--r--pkg/tcpip/link/loopback/BUILD2
-rw-r--r--pkg/tcpip/link/loopback/loopback.go6
-rw-r--r--pkg/tcpip/link/muxed/BUILD6
-rw-r--r--pkg/tcpip/link/muxed/injectable.go6
-rw-r--r--pkg/tcpip/link/muxed/injectable_test.go10
-rw-r--r--pkg/tcpip/link/rawfile/BUILD2
-rw-r--r--pkg/tcpip/link/rawfile/errors.go4
-rw-r--r--pkg/tcpip/link/rawfile/rawfile_unsafe.go4
-rw-r--r--pkg/tcpip/link/sharedmem/BUILD2
-rw-r--r--pkg/tcpip/link/sharedmem/pipe/BUILD2
-rw-r--r--pkg/tcpip/link/sharedmem/queue/BUILD2
-rw-r--r--pkg/tcpip/link/sharedmem/queue/queue_test.go2
-rw-r--r--pkg/tcpip/link/sharedmem/queue/rx.go4
-rw-r--r--pkg/tcpip/link/sharedmem/queue/tx.go4
-rw-r--r--pkg/tcpip/link/sharedmem/rx.go4
-rw-r--r--pkg/tcpip/link/sharedmem/sharedmem.go12
-rw-r--r--pkg/tcpip/link/sharedmem/sharedmem_test.go14
-rw-r--r--pkg/tcpip/link/sharedmem/tx.go2
-rw-r--r--pkg/tcpip/link/sniffer/BUILD2
-rw-r--r--pkg/tcpip/link/sniffer/sniffer.go10
-rw-r--r--pkg/tcpip/link/tun/BUILD2
-rw-r--r--pkg/tcpip/link/waitable/BUILD2
-rw-r--r--pkg/tcpip/link/waitable/waitable.go8
-rw-r--r--pkg/tcpip/link/waitable/waitable_test.go6
-rw-r--r--pkg/tcpip/network/arp/BUILD2
-rw-r--r--pkg/tcpip/network/arp/arp.go8
-rw-r--r--pkg/tcpip/network/arp/arp_test.go18
-rw-r--r--pkg/tcpip/network/fragmentation/BUILD2
-rw-r--r--pkg/tcpip/network/fragmentation/frag_heap.go2
-rw-r--r--pkg/tcpip/network/fragmentation/frag_heap_test.go2
-rw-r--r--pkg/tcpip/network/fragmentation/fragmentation.go6
-rw-r--r--pkg/tcpip/network/fragmentation/fragmentation_test.go2
-rw-r--r--pkg/tcpip/network/fragmentation/reassembler.go2
-rw-r--r--pkg/tcpip/network/hash/BUILD2
-rw-r--r--pkg/tcpip/network/hash/hash.go4
-rw-r--r--pkg/tcpip/network/ip_test.go18
-rw-r--r--pkg/tcpip/network/ipv4/BUILD2
-rw-r--r--pkg/tcpip/network/ipv4/icmp.go6
-rw-r--r--pkg/tcpip/network/ipv4/ipv4.go12
-rw-r--r--pkg/tcpip/network/ipv4/ipv4_test.go20
-rw-r--r--pkg/tcpip/network/ipv6/BUILD2
-rw-r--r--pkg/tcpip/network/ipv6/icmp.go8
-rw-r--r--pkg/tcpip/network/ipv6/icmp_test.go16
-rw-r--r--pkg/tcpip/network/ipv6/ipv6.go8
-rw-r--r--pkg/tcpip/ports/BUILD2
-rw-r--r--pkg/tcpip/ports/ports.go2
-rw-r--r--pkg/tcpip/ports/ports_test.go2
-rw-r--r--pkg/tcpip/sample/tun_tcp_connect/main.go20
-rw-r--r--pkg/tcpip/sample/tun_tcp_echo/main.go20
-rw-r--r--pkg/tcpip/seqnum/BUILD2
-rw-r--r--pkg/tcpip/stack/BUILD2
-rw-r--r--pkg/tcpip/stack/linkaddrcache.go4
-rw-r--r--pkg/tcpip/stack/linkaddrcache_test.go4
-rw-r--r--pkg/tcpip/stack/nic.go8
-rw-r--r--pkg/tcpip/stack/registration.go8
-rw-r--r--pkg/tcpip/stack/route.go8
-rw-r--r--pkg/tcpip/stack/stack.go57
-rw-r--r--pkg/tcpip/stack/stack_test.go10
-rw-r--r--pkg/tcpip/stack/transport_demuxer.go8
-rw-r--r--pkg/tcpip/stack/transport_test.go15
-rw-r--r--pkg/tcpip/tcpip.go108
-rw-r--r--pkg/tcpip/transport/icmp/BUILD4
-rw-r--r--pkg/tcpip/transport/icmp/endpoint.go18
-rw-r--r--pkg/tcpip/transport/icmp/endpoint_state.go6
-rw-r--r--pkg/tcpip/transport/icmp/protocol.go12
-rw-r--r--pkg/tcpip/transport/raw/BUILD8
-rw-r--r--pkg/tcpip/transport/raw/endpoint.go20
-rw-r--r--pkg/tcpip/transport/raw/endpoint_state.go6
-rw-r--r--pkg/tcpip/transport/tcp/BUILD5
-rw-r--r--pkg/tcpip/transport/tcp/accept.go28
-rw-r--r--pkg/tcpip/transport/tcp/connect.go64
-rw-r--r--pkg/tcpip/transport/tcp/cubic.go3
-rw-r--r--pkg/tcpip/transport/tcp/cubic_state.go29
-rw-r--r--pkg/tcpip/transport/tcp/dual_stack_test.go18
-rw-r--r--pkg/tcpip/transport/tcp/endpoint.go322
-rw-r--r--pkg/tcpip/transport/tcp/endpoint_state.go27
-rw-r--r--pkg/tcpip/transport/tcp/forwarder.go14
-rw-r--r--pkg/tcpip/transport/tcp/protocol.go72
-rw-r--r--pkg/tcpip/transport/tcp/rcv.go82
-rw-r--r--pkg/tcpip/transport/tcp/sack.go16
-rw-r--r--pkg/tcpip/transport/tcp/sack_scoreboard.go4
-rw-r--r--pkg/tcpip/transport/tcp/sack_scoreboard_test.go6
-rw-r--r--pkg/tcpip/transport/tcp/segment.go8
-rw-r--r--pkg/tcpip/transport/tcp/segment_state.go2
-rw-r--r--pkg/tcpip/transport/tcp/snd.go34
-rw-r--r--pkg/tcpip/transport/tcp/tcp_noracedetector_test.go10
-rw-r--r--pkg/tcpip/transport/tcp/tcp_sack_test.go14
-rw-r--r--pkg/tcpip/transport/tcp/tcp_test.go448
-rw-r--r--pkg/tcpip/transport/tcp/tcp_timestamp_test.go18
-rw-r--r--pkg/tcpip/transport/tcp/testing/context/BUILD2
-rw-r--r--pkg/tcpip/transport/tcp/testing/context/context.go117
-rw-r--r--pkg/tcpip/transport/tcp/timer.go2
-rw-r--r--pkg/tcpip/transport/tcpconntrack/BUILD2
-rw-r--r--pkg/tcpip/transport/tcpconntrack/tcp_conntrack.go4
-rw-r--r--pkg/tcpip/transport/tcpconntrack/tcp_conntrack_test.go4
-rw-r--r--pkg/tcpip/transport/udp/BUILD4
-rw-r--r--pkg/tcpip/transport/udp/endpoint.go55
-rw-r--r--pkg/tcpip/transport/udp/endpoint_state.go10
-rw-r--r--pkg/tcpip/transport/udp/forwarder.go8
-rw-r--r--pkg/tcpip/transport/udp/protocol.go12
-rw-r--r--pkg/tcpip/transport/udp/udp_test.go22
-rw-r--r--pkg/tmutex/BUILD2
-rw-r--r--pkg/unet/BUILD2
-rw-r--r--pkg/unet/unet.go2
-rw-r--r--pkg/unet/unet_test.go42
-rw-r--r--pkg/unet/unet_unsafe.go2
-rw-r--r--pkg/urpc/BUILD2
-rw-r--r--pkg/urpc/urpc.go8
-rw-r--r--pkg/urpc/urpc_test.go2
-rw-r--r--pkg/waiter/BUILD2
-rw-r--r--runsc/BUILD9
-rw-r--r--runsc/boot/BUILD12
-rw-r--r--runsc/boot/compat.go16
-rw-r--r--runsc/boot/compat_amd64.go2
-rw-r--r--runsc/boot/compat_test.go2
-rw-r--r--runsc/boot/config.go47
-rw-r--r--runsc/boot/controller.go45
-rw-r--r--runsc/boot/debug.go2
-rw-r--r--runsc/boot/events.go4
-rw-r--r--runsc/boot/fds.go35
-rw-r--r--runsc/boot/filter/BUILD4
-rw-r--r--runsc/boot/filter/config.go29
-rw-r--r--runsc/boot/filter/extra_filters.go4
-rw-r--r--runsc/boot/filter/extra_filters_msan.go2
-rw-r--r--runsc/boot/filter/extra_filters_race.go3
-rw-r--r--runsc/boot/filter/filter.go19
-rw-r--r--runsc/boot/fs.go324
-rw-r--r--runsc/boot/fs_test.go193
-rw-r--r--runsc/boot/limits.go4
-rw-r--r--runsc/boot/loader.go208
-rw-r--r--runsc/boot/loader_test.go22
-rw-r--r--runsc/boot/network.go22
-rw-r--r--runsc/boot/platforms/BUILD16
-rw-r--r--runsc/boot/platforms/platforms.go30
-rw-r--r--runsc/boot/pprof.go (renamed from pkg/sentry/kernel/kdefs/kdefs.go)10
-rw-r--r--runsc/boot/strace.go2
-rw-r--r--runsc/boot/user.go146
-rw-r--r--runsc/boot/user_test.go253
-rw-r--r--runsc/cgroup/BUILD2
-rw-r--r--runsc/cgroup/cgroup.go4
-rw-r--r--runsc/cmd/BUILD3
-rw-r--r--runsc/cmd/boot.go31
-rw-r--r--runsc/cmd/capability.go2
-rw-r--r--runsc/cmd/capability_test.go19
-rw-r--r--runsc/cmd/checkpoint.go15
-rw-r--r--runsc/cmd/chroot.go4
-rw-r--r--runsc/cmd/cmd.go4
-rw-r--r--runsc/cmd/create.go24
-rw-r--r--runsc/cmd/debug.go99
-rw-r--r--runsc/cmd/delete.go6
-rw-r--r--runsc/cmd/delete_test.go2
-rw-r--r--runsc/cmd/do.go64
-rw-r--r--runsc/cmd/error.go2
-rw-r--r--runsc/cmd/events.go6
-rw-r--r--runsc/cmd/exec.go22
-rw-r--r--runsc/cmd/exec_test.go8
-rw-r--r--runsc/cmd/gofer.go18
-rw-r--r--runsc/cmd/kill.go4
-rw-r--r--runsc/cmd/list.go4
-rw-r--r--runsc/cmd/pause.go4
-rw-r--r--runsc/cmd/ps.go6
-rw-r--r--runsc/cmd/restore.go27
-rw-r--r--runsc/cmd/resume.go4
-rw-r--r--runsc/cmd/run.go25
-rw-r--r--runsc/cmd/start.go4
-rw-r--r--runsc/cmd/state.go6
-rw-r--r--runsc/cmd/syscalls.go4
-rw-r--r--runsc/cmd/wait.go4
-rw-r--r--runsc/console/BUILD6
-rw-r--r--runsc/container/BUILD3
-rw-r--r--runsc/container/console_test.go37
-rw-r--r--runsc/container/container.go125
-rw-r--r--runsc/container/container_test.go307
-rw-r--r--runsc/container/hook.go2
-rw-r--r--runsc/container/multi_container_test.go361
-rw-r--r--runsc/container/shared_volume_test.go22
-rw-r--r--runsc/container/test_app/test_app.go2
-rw-r--r--runsc/fsgofer/BUILD2
-rw-r--r--runsc/fsgofer/filter/BUILD2
-rw-r--r--runsc/fsgofer/filter/config.go4
-rw-r--r--runsc/fsgofer/filter/extra_filters.go4
-rw-r--r--runsc/fsgofer/filter/extra_filters_msan.go4
-rw-r--r--runsc/fsgofer/filter/extra_filters_race.go4
-rw-r--r--runsc/fsgofer/filter/filter.go2
-rw-r--r--runsc/fsgofer/fsgofer.go41
-rw-r--r--runsc/fsgofer/fsgofer_test.go8
-rw-r--r--runsc/fsgofer/fsgofer_unsafe.go4
-rw-r--r--runsc/main.go76
-rw-r--r--runsc/sandbox/BUILD5
-rw-r--r--runsc/sandbox/network.go10
-rw-r--r--runsc/sandbox/sandbox.go232
-rw-r--r--runsc/specutils/BUILD7
-rw-r--r--runsc/specutils/fs.go42
-rw-r--r--runsc/specutils/namespace.go56
-rw-r--r--runsc/specutils/specutils.go8
-rw-r--r--runsc/test/BUILD0
-rw-r--r--runsc/test/build_defs.bzl19
-rw-r--r--runsc/test/image/BUILD7
-rw-r--r--runsc/test/image/image_test.go9
-rw-r--r--runsc/test/integration/BUILD7
-rw-r--r--runsc/test/integration/exec_test.go4
-rw-r--r--runsc/test/integration/integration_test.go12
-rw-r--r--runsc/test/integration/regression_test.go2
-rw-r--r--runsc/test/root/BUILD2
-rw-r--r--runsc/test/root/cgroup_test.go4
-rw-r--r--runsc/test/root/chroot_test.go4
-rw-r--r--runsc/test/root/crictl_test.go6
-rw-r--r--runsc/test/root/testdata/BUILD2
-rw-r--r--runsc/test/testutil/BUILD3
-rw-r--r--runsc/test/testutil/docker.go10
-rw-r--r--runsc/test/testutil/testutil.go55
-rw-r--r--test/syscalls/BUILD2
-rw-r--r--test/syscalls/gtest/BUILD2
-rw-r--r--test/syscalls/linux/BUILD24
-rw-r--r--test/syscalls/linux/chmod.cc8
-rw-r--r--test/syscalls/linux/chown.cc8
-rw-r--r--test/syscalls/linux/chroot.cc3
-rw-r--r--test/syscalls/linux/eventfd.cc9
-rw-r--r--test/syscalls/linux/exceptions.cc34
-rw-r--r--test/syscalls/linux/exec.cc3
-rw-r--r--test/syscalls/linux/exec_proc_exe_workload.cc3
-rw-r--r--test/syscalls/linux/exec_state_workload.cc22
-rw-r--r--test/syscalls/linux/file_base.h4
-rw-r--r--test/syscalls/linux/flock.cc2
-rw-r--r--test/syscalls/linux/getdents.cc15
-rw-r--r--test/syscalls/linux/inotify.cc17
-rw-r--r--test/syscalls/linux/ioctl.cc31
-rw-r--r--test/syscalls/linux/ip_socket_test_util.cc51
-rw-r--r--test/syscalls/linux/itimer.cc7
-rw-r--r--test/syscalls/linux/madvise.cc110
-rw-r--r--test/syscalls/linux/memfd.cc3
-rw-r--r--test/syscalls/linux/mknod.cc6
-rw-r--r--test/syscalls/linux/mmap.cc19
-rw-r--r--test/syscalls/linux/mount.cc8
-rw-r--r--test/syscalls/linux/mremap.cc11
-rw-r--r--test/syscalls/linux/open.cc37
-rw-r--r--test/syscalls/linux/pipe.cc227
-rw-r--r--test/syscalls/linux/proc.cc67
-rw-r--r--test/syscalls/linux/proc_net_tcp.cc281
-rw-r--r--test/syscalls/linux/proc_net_unix.cc12
-rw-r--r--test/syscalls/linux/pty.cc2
-rw-r--r--test/syscalls/linux/raw_socket_icmp.cc2
-rw-r--r--test/syscalls/linux/readv_common.h2
-rw-r--r--test/syscalls/linux/sendfile.cc6
-rw-r--r--test/syscalls/linux/sendfile_socket.cc170
-rw-r--r--test/syscalls/linux/socket_generic.cc50
-rw-r--r--test/syscalls/linux/socket_ip_tcp_generic.cc105
-rw-r--r--test/syscalls/linux/socket_netlink_route.cc45
-rw-r--r--test/syscalls/linux/socket_test_util.cc7
-rw-r--r--test/syscalls/linux/socket_test_util.h7
-rw-r--r--test/syscalls/linux/socket_unix_cmsg.cc4
-rw-r--r--test/syscalls/linux/socket_unix_unbound_stream.cc2
-rw-r--r--test/syscalls/linux/stat.cc99
-rw-r--r--test/syscalls/linux/symlink.cc99
-rw-r--r--test/syscalls/linux/tcp_socket.cc198
-rw-r--r--test/syscalls/linux/udp_socket.cc155
-rw-r--r--test/syscalls/linux/unix_domain_socket_test_util.cc28
-rw-r--r--test/syscalls/linux/unix_domain_socket_test_util.h4
-rw-r--r--test/syscalls/syscall_test_runner.go8
-rw-r--r--test/util/capability_util.h2
-rw-r--r--test/util/fs_util.cc6
-rw-r--r--test/util/fs_util.h20
-rw-r--r--test/util/fs_util_test.cc9
-rw-r--r--test/util/logging.cc2
-rw-r--r--test/util/memory_util.h11
-rw-r--r--test/util/mount_util.h6
-rw-r--r--test/util/posix_error.h2
-rw-r--r--test/util/proc_util.cc3
-rw-r--r--test/util/temp_path.cc7
-rw-r--r--test/util/temp_path.h4
-rw-r--r--test/util/test_util.cc5
-rw-r--r--test/util/timer_util.h2
-rw-r--r--third_party/gvsync/BUILD2
-rw-r--r--third_party/gvsync/atomicptr_unsafe.go12
-rw-r--r--third_party/gvsync/atomicptrtest/BUILD6
-rw-r--r--third_party/gvsync/seqatomic_unsafe.go2
-rw-r--r--third_party/gvsync/seqatomictest/BUILD6
-rw-r--r--third_party/gvsync/seqatomictest/seqatomic_test.go2
-rw-r--r--tools/checkunsafe/BUILD13
-rw-r--r--tools/checkunsafe/check_unsafe.go56
-rwxr-xr-xtools/go_branch.sh2
-rw-r--r--tools/go_generics/generics.go8
-rw-r--r--tools/go_generics/globals/BUILD2
-rw-r--r--tools/go_generics/globals/globals_visitor.go4
-rw-r--r--tools/go_generics/imports.go2
-rw-r--r--tools/go_stateify/defs.bzl4
-rw-r--r--tools/nogo.js7
-rwxr-xr-xtools/run_tests.sh45
-rw-r--r--vdso/check_vdso.py2
973 files changed, 16432 insertions, 7835 deletions
diff --git a/BUILD b/BUILD
index 6d5e800ca..60ed992c4 100644
--- a/BUILD
+++ b/BUILD
@@ -1,6 +1,6 @@
package(licenses = ["notice"]) # Apache 2.0
-load("@io_bazel_rules_go//go:def.bzl", "go_path")
+load("@io_bazel_rules_go//go:def.bzl", "go_path", "nogo")
load("@bazel_gazelle//:def.bzl", "gazelle")
# The sandbox filegroup is used for sandbox-internal dependencies.
@@ -29,3 +29,15 @@ go_path(
# To update the WORKSPACE from go.mod, use:
# bazel run //:gazelle -- update-repos -from_file=go.mod
gazelle(name = "gazelle")
+
+# nogo applies checks to all Go source in this repository, enforcing code
+# guidelines and restrictions. Note that the tool libraries themselves should
+# live in the tools subdirectory (unless they are standard).
+nogo(
+ name = "nogo",
+ config = "tools/nogo.js",
+ visibility = ["//visibility:public"],
+ deps = [
+ "//tools/checkunsafe",
+ ],
+)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index d888869a9..638942a42 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -88,6 +88,11 @@ Code changes are accepted via [pull request][github].
When approved, the change will be submitted by a team member and automatically
merged into the repository.
+### Presubmit checks
+
+Accessing check logs may require membership in the
+[gvisor-dev mailing list][gvisor-dev-list], which is public.
+
### Bug IDs
Some TODOs and NOTEs sprinkled throughout the code have associated IDs of the
@@ -105,5 +110,6 @@ one above, the
[gcla]: https://cla.developers.google.com/about/google-individual
[gccla]: https://cla.developers.google.com/about/google-corporate
[github]: https://github.com/google/gvisor/compare
+[gvisor-dev-list]: https://groups.google.com/forum/#!forum/gvisor-dev
[gostyle]: https://github.com/golang/go/wiki/CodeReviewComments
[teststyle]: ./test/
diff --git a/Dockerfile b/Dockerfile
index 32ac76f1b..6e9d870db 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,6 +1,6 @@
FROM ubuntu:bionic
-RUN apt-get update && apt-get install -y curl gnupg2 git
+RUN apt-get update && apt-get install -y curl gnupg2 git python3
RUN echo "deb [arch=amd64] http://storage.googleapis.com/bazel-apt stable jdk1.8" | tee /etc/apt/sources.list.d/bazel.list && \
curl https://bazel.build/bazel-release.pub.gpg | apt-key add -
RUN apt-get update && apt-get install -y bazel && apt-get clean
diff --git a/README.md b/README.md
index ba01ffc1e..d102845ac 100644
--- a/README.md
+++ b/README.md
@@ -53,15 +53,6 @@ Make sure the following dependencies are installed:
* [Docker version 17.09.0 or greater][docker]
* Gold linker (e.g. `binutils-gold` package on Ubuntu)
-### Getting the source
-
-Clone the repository:
-
-```
-git clone https://gvisor.googlesource.com/gvisor gvisor
-cd gvisor
-```
-
### Building
Build and install the `runsc` binary:
@@ -84,7 +75,7 @@ sudo cp ./bazel-bin/runsc/linux_amd64_pure_stripped/runsc /usr/local/bin
The test suite can be run with Bazel:
```
-bazel test ...
+bazel test //...
```
or in a Docker container:
@@ -116,6 +107,24 @@ Then invoke bazel with the following flags:
You can also add those flags to your local ~/.bazelrc to avoid needing to
specify them each time on the command line.
+### Using `go get`
+
+This project uses [bazel][bazel] to build and manage dependencies. A synthetic
+`go` branch is maintained that is compatible with standard `go` tooling for
+convenience.
+
+For example, to build `runsc` directly from this branch:
+
+```
+echo "module runsc" > go.mod
+GO111MODULE=on go get gvisor.dev/gvisor/runsc@go
+CGO_ENABLED=0 GO111MODULE=on go install gvisor.dev/gvisor/runsc
+```
+
+Note that this branch is supported in a best effort capacity, and direct
+development on this branch is not supported. Development should occur on the
+`master` branch, which is then reflected into the `go` branch.
+
## Community & Governance
The governance model is documented in our [community][community] repository.
diff --git a/WORKSPACE b/WORKSPACE
index 5155dc527..b57320e7a 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -3,8 +3,11 @@ load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
http_archive(
name = "io_bazel_rules_go",
- sha256 = "a82a352bffae6bee4e95f68a8d80a70e87f42c4741e6a448bec11998fcc82329",
- url = "https://github.com/bazelbuild/rules_go/releases/download/0.18.5/rules_go-0.18.5.tar.gz",
+ sha256 = "45409e6c4f748baa9e05f8f6ab6efaa05739aa064e3ab94e5a1a09849c51806a",
+ urls = [
+ "https://storage.googleapis.com/bazel-mirror/github.com/bazelbuild/rules_go/releases/download/0.18.7/rules_go-0.18.7.tar.gz",
+ "https://github.com/bazelbuild/rules_go/releases/download/0.18.7/rules_go-0.18.7.tar.gz",
+ ],
)
http_archive(
@@ -17,7 +20,10 @@ load("@io_bazel_rules_go//go:deps.bzl", "go_rules_dependencies", "go_register_to
go_rules_dependencies()
-go_register_toolchains(go_version = "1.12.5")
+go_register_toolchains(
+ go_version = "1.12.7",
+ nogo = "@//:nogo",
+)
load("@bazel_gazelle//:deps.bzl", "gazelle_dependencies", "go_repository")
@@ -139,6 +145,12 @@ go_repository(
)
go_repository(
+ name = "org_golang_x_tools",
+ commit = "aa82965741a9fecd12b026fbb3d3c6ed3231b8f8",
+ importpath = "golang.org/x/tools",
+)
+
+go_repository(
name = "com_github_google_btree",
importpath = "github.com/google/btree",
tag = "v1.0.0",
diff --git a/g3doc/README.md b/g3doc/README.md
new file mode 100644
index 000000000..49d58cdae
--- /dev/null
+++ b/g3doc/README.md
@@ -0,0 +1,2 @@
+The gVisor logo files are licensed under CC BY-SA 4.0 (Creative Commons
+Attribution-ShareAlike 4.0 International).
diff --git a/go.mod b/go.mod
index e58b84cfb..821273d22 100644
--- a/go.mod
+++ b/go.mod
@@ -1,4 +1,4 @@
-module gvisor.googlesource.com/gvisor
+module gvisor.dev/gvisor
go 1.12
diff --git a/go.sum b/go.sum
new file mode 100644
index 000000000..7a0bc175a
--- /dev/null
+++ b/go.sum
@@ -0,0 +1,19 @@
+github.com/cenkalti/backoff v0.0.0-20190506075156-2146c9339422/go.mod h1:b6Nc7NRH5C4aCISLry0tLnTjcuTEvoiqcWDdsU0sOGM=
+github.com/gofrs/flock v0.6.1-0.20180915234121-886344bea079/go.mod h1:F1TvTiK9OcQqauNUHlbJvyl9Qa1QvF/gOUDKA14jxHU=
+github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y=
+github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
+github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
+github.com/google/subcommands v0.0.0-20190508160503-636abe8753b8/go.mod h1:ZjhPrFU+Olkh9WazFPsl27BQ4UPiG37m3yTrtFlrHVk=
+github.com/google/uuid v0.0.0-20171129191014-dec09d789f3d/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
+github.com/opencontainers/runtime-spec v0.1.2-0.20171211145439-b2d941ef6a78/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
+github.com/syndtr/gocapability v0.0.0-20180916011248-d98352740cb2/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
+github.com/vishvananda/netlink v1.0.1-0.20190318003149-adb577d4a45e/go.mod h1:+SR5DhBJrl6ZM7CoCKvpw5BKroDKQ+PJqOg65H/2ktk=
+github.com/vishvananda/netns v0.0.0-20171111001504-be1fbeda1936/go.mod h1:ZjcWmFBXmLKZu9Nxj3WKYEafiSqer2rnvPr0en9UNpI=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
diff --git a/pkg/abi/BUILD b/pkg/abi/BUILD
index 323263ebf..32c601a03 100644
--- a/pkg/abi/BUILD
+++ b/pkg/abi/BUILD
@@ -9,6 +9,6 @@ go_library(
"abi_linux.go",
"flag.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/abi",
+ importpath = "gvisor.dev/gvisor/pkg/abi",
visibility = ["//:sandbox"],
)
diff --git a/pkg/abi/linux/BUILD b/pkg/abi/linux/BUILD
index fbd0e4674..2061b38c0 100644
--- a/pkg/abi/linux/BUILD
+++ b/pkg/abi/linux/BUILD
@@ -1,4 +1,4 @@
-# Package linux contains the constants and types needed to inferface with a
+# Package linux contains the constants and types needed to interface with a
# Linux kernel. It should be used instead of syscall or golang.org/x/sys/unix
# when the host OS may not be Linux.
@@ -10,13 +10,13 @@ go_library(
name = "linux",
srcs = [
"aio.go",
- "ashmem.go",
"audit.go",
- "binder.go",
"bpf.go",
"capability.go",
+ "clone.go",
"dev.go",
"elf.go",
+ "epoll.go",
"errors.go",
"eventfd.go",
"exec.go",
@@ -54,7 +54,7 @@ go_library(
"utsname.go",
"wait.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/abi/linux",
+ importpath = "gvisor.dev/gvisor/pkg/abi/linux",
visibility = ["//visibility:public"],
deps = [
"//pkg/abi",
diff --git a/pkg/abi/linux/capability.go b/pkg/abi/linux/capability.go
index 65dd77e6e..965f74663 100644
--- a/pkg/abi/linux/capability.go
+++ b/pkg/abi/linux/capability.go
@@ -60,13 +60,14 @@ const (
CAP_BLOCK_SUSPEND = Capability(36)
CAP_AUDIT_READ = Capability(37)
- // MaxCapability is the highest-numbered capability.
- MaxCapability = CAP_AUDIT_READ
+ // CAP_LAST_CAP is the highest-numbered capability.
+ // Seach for "CAP_LAST_CAP" to find other places that need to change.
+ CAP_LAST_CAP = CAP_AUDIT_READ
)
// Ok returns true if cp is a supported capability.
func (cp Capability) Ok() bool {
- return cp >= 0 && cp <= MaxCapability
+ return cp >= 0 && cp <= CAP_LAST_CAP
}
// String returns the capability name.
diff --git a/pkg/abi/linux/clone.go b/pkg/abi/linux/clone.go
new file mode 100644
index 000000000..c2cbfca5e
--- /dev/null
+++ b/pkg/abi/linux/clone.go
@@ -0,0 +1,41 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// Clone constants per clone(2).
+const (
+ CLONE_VM = 0x100
+ CLONE_FS = 0x200
+ CLONE_FILES = 0x400
+ CLONE_SIGHAND = 0x800
+ CLONE_PARENT = 0x8000
+ CLONE_PTRACE = 0x2000
+ CLONE_VFORK = 0x4000
+ CLONE_THREAD = 0x10000
+ CLONE_NEWNS = 0x20000
+ CLONE_SYSVSEM = 0x40000
+ CLONE_SETTLS = 0x80000
+ CLONE_PARENT_SETTID = 0x100000
+ CLONE_CHILD_CLEARTID = 0x200000
+ CLONE_DETACHED = 0x400000
+ CLONE_UNTRACED = 0x800000
+ CLONE_CHILD_SETTID = 0x1000000
+ CLONE_NEWUTS = 0x4000000
+ CLONE_NEWIPC = 0x8000000
+ CLONE_NEWUSER = 0x10000000
+ CLONE_NEWPID = 0x20000000
+ CLONE_NEWNET = 0x40000000
+ CLONE_IO = 0x80000000
+)
diff --git a/pkg/abi/linux/epoll.go b/pkg/abi/linux/epoll.go
new file mode 100644
index 000000000..72083b604
--- /dev/null
+++ b/pkg/abi/linux/epoll.go
@@ -0,0 +1,56 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// EpollEvent is equivalent to struct epoll_event from epoll(2).
+type EpollEvent struct {
+ Events uint32
+ Fd int32
+ Data int32
+}
+
+// Event masks.
+const (
+ EPOLLIN = 0x1
+ EPOLLPRI = 0x2
+ EPOLLOUT = 0x4
+ EPOLLERR = 0x8
+ EPOLLHUP = 0x10
+ EPOLLRDNORM = 0x40
+ EPOLLRDBAND = 0x80
+ EPOLLWRNORM = 0x100
+ EPOLLWRBAND = 0x200
+ EPOLLMSG = 0x400
+ EPOLLRDHUP = 0x2000
+)
+
+// Per-file descriptor flags.
+const (
+ EPOLLET = 0x80000000
+ EPOLLONESHOT = 0x40000000
+)
+
+// Operation flags.
+const (
+ EPOLL_CLOEXEC = 0x80000
+ EPOLL_NONBLOCK = 0x800
+)
+
+// Control operations.
+const (
+ EPOLL_CTL_ADD = 0x1
+ EPOLL_CTL_DEL = 0x2
+ EPOLL_CTL_MOD = 0x3
+)
diff --git a/pkg/abi/linux/fcntl.go b/pkg/abi/linux/fcntl.go
index b30350193..f78315ebf 100644
--- a/pkg/abi/linux/fcntl.go
+++ b/pkg/abi/linux/fcntl.go
@@ -14,23 +14,41 @@
package linux
-// Comands from linux/fcntl.h.
+// Commands from linux/fcntl.h.
const (
- F_DUPFD = 0
- F_GETFD = 1
- F_GETFL = 3
- F_GETOWN = 9
- F_SETFD = 2
- F_SETFL = 4
- F_SETLK = 6
- F_SETLKW = 7
- F_SETOWN = 8
+ F_DUPFD = 0x0
+ F_GETFD = 0x1
+ F_SETFD = 0x2
+ F_GETFL = 0x3
+ F_SETFL = 0x4
+ F_SETLK = 0x6
+ F_SETLKW = 0x7
+ F_SETOWN = 0x8
+ F_GETOWN = 0x9
F_DUPFD_CLOEXEC = 1024 + 6
F_SETPIPE_SZ = 1024 + 7
F_GETPIPE_SZ = 1024 + 8
)
+// Commands for F_SETLK.
+const (
+ F_RDLCK = 0x0
+ F_WRLCK = 0x1
+ F_UNLCK = 0x2
+)
+
// Flags for fcntl.
const (
FD_CLOEXEC = 00000001
)
+
+// Lock structure for F_SETLK.
+type Flock struct {
+ Type int16
+ Whence int16
+ _ [4]byte
+ Start int64
+ Len int64
+ Pid int32
+ _ [4]byte
+}
diff --git a/pkg/abi/linux/file.go b/pkg/abi/linux/file.go
index 81ff9fe9e..f78ffaa82 100644
--- a/pkg/abi/linux/file.go
+++ b/pkg/abi/linux/file.go
@@ -18,8 +18,8 @@ import (
"fmt"
"strings"
- "gvisor.googlesource.com/gvisor/pkg/abi"
- "gvisor.googlesource.com/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/abi"
+ "gvisor.dev/gvisor/pkg/binary"
)
// Constants for open(2).
@@ -161,26 +161,90 @@ const (
// Stat represents struct stat.
type Stat struct {
- Dev uint64
- Ino uint64
- Nlink uint64
- Mode uint32
- UID uint32
- GID uint32
- X_pad0 int32
- Rdev uint64
- Size int64
- Blksize int64
- Blocks int64
- ATime Timespec
- MTime Timespec
- CTime Timespec
- X_unused [3]int64
+ Dev uint64
+ Ino uint64
+ Nlink uint64
+ Mode uint32
+ UID uint32
+ GID uint32
+ _ int32
+ Rdev uint64
+ Size int64
+ Blksize int64
+ Blocks int64
+ ATime Timespec
+ MTime Timespec
+ CTime Timespec
+ _ [3]int64
}
+// File types.
+const (
+ DT_BLK = 0x6
+ DT_CHR = 0x2
+ DT_DIR = 0x4
+ DT_FIFO = 0x1
+ DT_LNK = 0xa
+ DT_REG = 0x8
+ DT_SOCK = 0xc
+ DT_UNKNOWN = 0x0
+ DT_WHT = 0xe
+)
+
// SizeOfStat is the size of a Stat struct.
var SizeOfStat = binary.Size(Stat{})
+// Flags for statx.
+const (
+ AT_STATX_SYNC_TYPE = 0x6000
+ AT_STATX_SYNC_AS_STAT = 0x0000
+ AT_STATX_FORCE_SYNC = 0x2000
+ AT_STATX_DONT_SYNC = 0x4000
+)
+
+// Mask values for statx.
+const (
+ STATX_TYPE = 0x00000001
+ STATX_MODE = 0x00000002
+ STATX_NLINK = 0x00000004
+ STATX_UID = 0x00000008
+ STATX_GID = 0x00000010
+ STATX_ATIME = 0x00000020
+ STATX_MTIME = 0x00000040
+ STATX_CTIME = 0x00000080
+ STATX_INO = 0x00000100
+ STATX_SIZE = 0x00000200
+ STATX_BLOCKS = 0x00000400
+ STATX_BASIC_STATS = 0x000007ff
+ STATX_BTIME = 0x00000800
+ STATX_ALL = 0x00000fff
+ STATX__RESERVED = 0x80000000
+)
+
+// Statx represents struct statx.
+type Statx struct {
+ Mask uint32
+ Blksize uint32
+ Attributes uint64
+ Nlink uint32
+ UID uint32
+ GID uint32
+ Mode uint16
+ _ uint16
+ Ino uint64
+ Size uint64
+ Blocks uint64
+ AttributesMask uint64
+ Atime StatxTimestamp
+ Btime StatxTimestamp
+ Ctime StatxTimestamp
+ Mtime StatxTimestamp
+ RdevMajor uint32
+ RdevMinor uint32
+ DevMajor uint32
+ DevMinor uint32
+}
+
// FileMode represents a mode_t.
type FileMode uint
diff --git a/pkg/abi/linux/ioctl.go b/pkg/abi/linux/ioctl.go
index 04bb767dc..0e18db9ef 100644
--- a/pkg/abi/linux/ioctl.go
+++ b/pkg/abi/linux/ioctl.go
@@ -72,28 +72,3 @@ const (
SIOCGMIIPHY = 0x8947
SIOCGMIIREG = 0x8948
)
-
-// ioctl(2) requests provided by uapi/linux/android/binder.h
-const (
- BinderWriteReadIoctl = 0xc0306201
- BinderSetIdleTimeoutIoctl = 0x40086203
- BinderSetMaxThreadsIoctl = 0x40046205
- BinderSetIdlePriorityIoctl = 0x40046206
- BinderSetContextMgrIoctl = 0x40046207
- BinderThreadExitIoctl = 0x40046208
- BinderVersionIoctl = 0xc0046209
-)
-
-// ioctl(2) requests provided by drivers/staging/android/uapi/ashmem.h
-const (
- AshmemSetNameIoctl = 0x41007701
- AshmemGetNameIoctl = 0x81007702
- AshmemSetSizeIoctl = 0x40087703
- AshmemGetSizeIoctl = 0x00007704
- AshmemSetProtMaskIoctl = 0x40087705
- AshmemGetProtMaskIoctl = 0x00007706
- AshmemPinIoctl = 0x40087707
- AshmemUnpinIoctl = 0x40087708
- AshmemGetPinStatusIoctl = 0x00007709
- AshmemPurgeAllCachesIoctl = 0x0000770a
-)
diff --git a/pkg/abi/linux/ipc.go b/pkg/abi/linux/ipc.go
index 2ef8d6cbb..22acd2d43 100644
--- a/pkg/abi/linux/ipc.go
+++ b/pkg/abi/linux/ipc.go
@@ -44,10 +44,10 @@ type IPCPerm struct {
CUID uint32
CGID uint32
Mode uint16
- pad1 uint16
+ _ uint16
Seq uint16
- pad2 uint16
- pad3 uint32
+ _ uint16
+ _ uint32
unused1 uint64
unused2 uint64
}
diff --git a/pkg/abi/linux/linux.go b/pkg/abi/linux/linux.go
index 8a8f831cd..281acdbde 100644
--- a/pkg/abi/linux/linux.go
+++ b/pkg/abi/linux/linux.go
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-// Package linux contains the constants and types needed to inferface with a Linux kernel.
+// Package linux contains the constants and types needed to interface with a Linux kernel.
package linux
// NumSoftIRQ is the number of software IRQs, exposed via /proc/stat.
diff --git a/pkg/abi/linux/netdevice.go b/pkg/abi/linux/netdevice.go
index aef1acf75..7866352b4 100644
--- a/pkg/abi/linux/netdevice.go
+++ b/pkg/abi/linux/netdevice.go
@@ -14,7 +14,7 @@
package linux
-import "gvisor.googlesource.com/gvisor/pkg/binary"
+import "gvisor.dev/gvisor/pkg/binary"
const (
// IFNAMSIZ is the size of the name field for IFReq.
diff --git a/pkg/abi/linux/netlink.go b/pkg/abi/linux/netlink.go
index 5e718c363..e8b6544b4 100644
--- a/pkg/abi/linux/netlink.go
+++ b/pkg/abi/linux/netlink.go
@@ -41,10 +41,10 @@ const (
// SockAddrNetlink is struct sockaddr_nl, from uapi/linux/netlink.h.
type SockAddrNetlink struct {
- Family uint16
- Padding uint16
- PortID uint32
- Groups uint32
+ Family uint16
+ _ uint16
+ PortID uint32
+ Groups uint32
}
// SockAddrNetlinkSize is the size of SockAddrNetlink.
diff --git a/pkg/abi/linux/netlink_route.go b/pkg/abi/linux/netlink_route.go
index 630dc339a..dd698e2bc 100644
--- a/pkg/abi/linux/netlink_route.go
+++ b/pkg/abi/linux/netlink_route.go
@@ -86,12 +86,12 @@ const (
// InterfaceInfoMessage is struct ifinfomsg, from uapi/linux/rtnetlink.h.
type InterfaceInfoMessage struct {
- Family uint8
- Padding uint8
- Type uint16
- Index int32
- Flags uint32
- Change uint32
+ Family uint8
+ _ uint8
+ Type uint16
+ Index int32
+ Flags uint32
+ Change uint32
}
// Interface flags, from uapi/linux/if.h.
diff --git a/pkg/abi/linux/sched.go b/pkg/abi/linux/sched.go
index 193d9a242..70e820823 100644
--- a/pkg/abi/linux/sched.go
+++ b/pkg/abi/linux/sched.go
@@ -28,3 +28,9 @@ const (
// reverted back to SCHED_NORMAL on fork.
SCHED_RESET_ON_FORK = 0x40000000
)
+
+const (
+ PRIO_PGRP = 0x1
+ PRIO_PROCESS = 0x0
+ PRIO_USER = 0x2
+)
diff --git a/pkg/abi/linux/signal.go b/pkg/abi/linux/signal.go
index 9cbd77dda..c69b04ea9 100644
--- a/pkg/abi/linux/signal.go
+++ b/pkg/abi/linux/signal.go
@@ -15,7 +15,7 @@
package linux
import (
- "gvisor.googlesource.com/gvisor/pkg/bits"
+ "gvisor.dev/gvisor/pkg/bits"
)
const (
diff --git a/pkg/abi/linux/socket.go b/pkg/abi/linux/socket.go
index a714ac86d..6d22002c4 100644
--- a/pkg/abi/linux/socket.go
+++ b/pkg/abi/linux/socket.go
@@ -14,7 +14,7 @@
package linux
-import "gvisor.googlesource.com/gvisor/pkg/binary"
+import "gvisor.dev/gvisor/pkg/binary"
// Address families, from linux/socket.h.
const (
diff --git a/pkg/abi/linux/time.go b/pkg/abi/linux/time.go
index fa9ee27e1..546668bca 100644
--- a/pkg/abi/linux/time.go
+++ b/pkg/abi/linux/time.go
@@ -226,3 +226,24 @@ type Tms struct {
// TimerID represents type timer_t, which identifies a POSIX per-process
// interval timer.
type TimerID int32
+
+// StatxTimestamp represents struct statx_timestamp.
+type StatxTimestamp struct {
+ Sec int64
+ Nsec uint32
+ _ int32
+}
+
+// NsecToStatxTimestamp translates nanoseconds to StatxTimestamp.
+func NsecToStatxTimestamp(nsec int64) (ts StatxTimestamp) {
+ return StatxTimestamp{
+ Sec: nsec / 1e9,
+ Nsec: uint32(nsec % 1e9),
+ }
+}
+
+// Utime represents struct utimbuf used by utimes(2).
+type Utime struct {
+ Actime int64
+ Modtime int64
+}
diff --git a/pkg/amutex/BUILD b/pkg/amutex/BUILD
index bdb6e8f2c..39d253b98 100644
--- a/pkg/amutex/BUILD
+++ b/pkg/amutex/BUILD
@@ -5,7 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "amutex",
srcs = ["amutex.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/amutex",
+ importpath = "gvisor.dev/gvisor/pkg/amutex",
visibility = ["//:sandbox"],
)
@@ -14,5 +14,4 @@ go_test(
size = "small",
srcs = ["amutex_test.go"],
embed = [":amutex"],
- tags = ["flaky"],
)
diff --git a/pkg/amutex/amutex.go b/pkg/amutex/amutex.go
index 4f7759b87..1c4fd1784 100644
--- a/pkg/amutex/amutex.go
+++ b/pkg/amutex/amutex.go
@@ -21,13 +21,13 @@ import (
)
// Sleeper must be implemented by users of the abortable mutex to allow for
-// cancelation of waits.
+// cancellation of waits.
type Sleeper interface {
// SleepStart is called by the AbortableMutex.Lock() function when the
// mutex is contended and the goroutine is about to sleep.
//
// A channel can be returned that causes the sleep to be canceled if
- // it's readable. If no cancelation is desired, nil can be returned.
+ // it's readable. If no cancellation is desired, nil can be returned.
SleepStart() <-chan struct{}
// SleepFinish is called by AbortableMutex.Lock() once a contended mutex
diff --git a/pkg/amutex/amutex_test.go b/pkg/amutex/amutex_test.go
index 211bdda4b..1d7f45641 100644
--- a/pkg/amutex/amutex_test.go
+++ b/pkg/amutex/amutex_test.go
@@ -47,7 +47,7 @@ func TestMutualExclusion(t *testing.T) {
// goroutines ran concurrently within the critical section.
//
// If one of the goroutines doesn't complete, it's likely a bug that
- // causes to to wait forever.
+ // causes it to wait forever.
const gr = 1000
const iters = 100000
v := 0
diff --git a/pkg/atomicbitops/BUILD b/pkg/atomicbitops/BUILD
index 9555bf645..47ab65346 100644
--- a/pkg/atomicbitops/BUILD
+++ b/pkg/atomicbitops/BUILD
@@ -9,7 +9,7 @@ go_library(
"atomic_bitops_amd64.s",
"atomic_bitops_common.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/atomicbitops",
+ importpath = "gvisor.dev/gvisor/pkg/atomicbitops",
visibility = ["//:sandbox"],
)
diff --git a/pkg/binary/BUILD b/pkg/binary/BUILD
index bd37376b0..09d6c2c1f 100644
--- a/pkg/binary/BUILD
+++ b/pkg/binary/BUILD
@@ -5,7 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "binary",
srcs = ["binary.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/binary",
+ importpath = "gvisor.dev/gvisor/pkg/binary",
visibility = ["//:sandbox"],
)
diff --git a/pkg/bits/BUILD b/pkg/bits/BUILD
index 5214b2c24..0c2dde4f8 100644
--- a/pkg/bits/BUILD
+++ b/pkg/bits/BUILD
@@ -14,7 +14,7 @@ go_library(
"uint64_arch_amd64_asm.s",
"uint64_arch_generic.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/bits",
+ importpath = "gvisor.dev/gvisor/pkg/bits",
visibility = ["//:sandbox"],
)
diff --git a/pkg/bpf/BUILD b/pkg/bpf/BUILD
index 3c7ae3103..b692aa3b1 100644
--- a/pkg/bpf/BUILD
+++ b/pkg/bpf/BUILD
@@ -11,7 +11,7 @@ go_library(
"interpreter.go",
"program_builder.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/bpf",
+ importpath = "gvisor.dev/gvisor/pkg/bpf",
visibility = ["//visibility:public"],
deps = ["//pkg/abi/linux"],
)
diff --git a/pkg/bpf/bpf.go b/pkg/bpf/bpf.go
index eb546f48f..b8b8ad372 100644
--- a/pkg/bpf/bpf.go
+++ b/pkg/bpf/bpf.go
@@ -17,7 +17,7 @@
// https://www.freebsd.org/cgi/man.cgi?bpf(4)
package bpf
-import "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+import "gvisor.dev/gvisor/pkg/abi/linux"
const (
// MaxInstructions is the maximum number of instructions in a BPF program,
diff --git a/pkg/bpf/decoder.go b/pkg/bpf/decoder.go
index 45c192215..c8ee0c3b1 100644
--- a/pkg/bpf/decoder.go
+++ b/pkg/bpf/decoder.go
@@ -18,7 +18,7 @@ import (
"bytes"
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/abi/linux"
)
// DecodeProgram translates an array of BPF instructions into text format.
diff --git a/pkg/bpf/decoder_test.go b/pkg/bpf/decoder_test.go
index 8c4bdad21..6a023f0c0 100644
--- a/pkg/bpf/decoder_test.go
+++ b/pkg/bpf/decoder_test.go
@@ -17,7 +17,7 @@ package bpf
import (
"testing"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/abi/linux"
)
func TestDecode(t *testing.T) {
diff --git a/pkg/bpf/interpreter.go b/pkg/bpf/interpreter.go
index 86de523a2..ed27abb9b 100644
--- a/pkg/bpf/interpreter.go
+++ b/pkg/bpf/interpreter.go
@@ -17,7 +17,7 @@ package bpf
import (
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/abi/linux"
)
// Possible values for ProgramError.Code.
diff --git a/pkg/bpf/interpreter_test.go b/pkg/bpf/interpreter_test.go
index 67b00ffe3..547921d0a 100644
--- a/pkg/bpf/interpreter_test.go
+++ b/pkg/bpf/interpreter_test.go
@@ -17,8 +17,8 @@ package bpf
import (
"testing"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/binary"
)
func TestCompilationErrors(t *testing.T) {
diff --git a/pkg/bpf/program_builder.go b/pkg/bpf/program_builder.go
index fc9d27203..7992044d0 100644
--- a/pkg/bpf/program_builder.go
+++ b/pkg/bpf/program_builder.go
@@ -18,7 +18,7 @@ import (
"fmt"
"math"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/abi/linux"
)
const (
diff --git a/pkg/bpf/program_builder_test.go b/pkg/bpf/program_builder_test.go
index 5b2ad67de..92ca5f4c3 100644
--- a/pkg/bpf/program_builder_test.go
+++ b/pkg/bpf/program_builder_test.go
@@ -18,7 +18,7 @@ import (
"fmt"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/abi/linux"
)
func validate(p *ProgramBuilder, expected []linux.BPFInstruction) error {
diff --git a/pkg/compressio/BUILD b/pkg/compressio/BUILD
index 3a0ac64e6..cdec96df1 100644
--- a/pkg/compressio/BUILD
+++ b/pkg/compressio/BUILD
@@ -5,7 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "compressio",
srcs = ["compressio.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/compressio",
+ importpath = "gvisor.dev/gvisor/pkg/compressio",
visibility = ["//:sandbox"],
deps = ["//pkg/binary"],
)
diff --git a/pkg/compressio/compressio.go b/pkg/compressio/compressio.go
index 8c14ccbfa..3b0bb086e 100644
--- a/pkg/compressio/compressio.go
+++ b/pkg/compressio/compressio.go
@@ -54,7 +54,7 @@ import (
"runtime"
"sync"
- "gvisor.googlesource.com/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/binary"
)
var bufPool = sync.Pool{
diff --git a/pkg/control/client/BUILD b/pkg/control/client/BUILD
index 22a4a4a5a..066d7b1a1 100644
--- a/pkg/control/client/BUILD
+++ b/pkg/control/client/BUILD
@@ -7,7 +7,7 @@ go_library(
srcs = [
"client.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/control/client",
+ importpath = "gvisor.dev/gvisor/pkg/control/client",
visibility = ["//:sandbox"],
deps = [
"//pkg/unet",
diff --git a/pkg/control/client/client.go b/pkg/control/client/client.go
index 3fec27846..41807cd45 100644
--- a/pkg/control/client/client.go
+++ b/pkg/control/client/client.go
@@ -16,8 +16,8 @@
package client
import (
- "gvisor.googlesource.com/gvisor/pkg/unet"
- "gvisor.googlesource.com/gvisor/pkg/urpc"
+ "gvisor.dev/gvisor/pkg/unet"
+ "gvisor.dev/gvisor/pkg/urpc"
)
// ConnectTo attempts to connect to the sandbox with the given address.
diff --git a/pkg/control/server/BUILD b/pkg/control/server/BUILD
index 76b2e9787..21adf3adf 100644
--- a/pkg/control/server/BUILD
+++ b/pkg/control/server/BUILD
@@ -5,7 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "server",
srcs = ["server.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/control/server",
+ importpath = "gvisor.dev/gvisor/pkg/control/server",
visibility = ["//:sandbox"],
deps = [
"//pkg/log",
diff --git a/pkg/control/server/server.go b/pkg/control/server/server.go
index 1a15da1a8..a56152d10 100644
--- a/pkg/control/server/server.go
+++ b/pkg/control/server/server.go
@@ -24,9 +24,9 @@ import (
"os"
"sync"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/unet"
- "gvisor.googlesource.com/gvisor/pkg/urpc"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/unet"
+ "gvisor.dev/gvisor/pkg/urpc"
)
// curUID is the unix user ID of the user that the control server is running as.
diff --git a/pkg/cpuid/BUILD b/pkg/cpuid/BUILD
index 29cc38778..830e19e07 100644
--- a/pkg/cpuid/BUILD
+++ b/pkg/cpuid/BUILD
@@ -8,7 +8,7 @@ go_library(
"cpu_amd64.s",
"cpuid.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/cpuid",
+ importpath = "gvisor.dev/gvisor/pkg/cpuid",
visibility = ["//:sandbox"],
deps = ["//pkg/log"],
)
diff --git a/pkg/cpuid/cpuid.go b/pkg/cpuid/cpuid.go
index 3eb2bcd2b..3fabaf445 100644
--- a/pkg/cpuid/cpuid.go
+++ b/pkg/cpuid/cpuid.go
@@ -36,7 +36,7 @@ import (
"strconv"
"strings"
- "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/log"
)
// Common references for CPUID leaves and bits:
@@ -620,7 +620,7 @@ func vendorIDFromRegs(bx, cx, dx uint32) string {
// state includes floating point registers, and other cpu state that's not
// associated with the normal task context.
//
-// Note: We can save some space here with an optimiazation where we use a
+// Note: We can save some space here with an optimization where we use a
// smaller chunk of memory depending on features that are actually enabled.
// Currently we just use the largest possible size for simplicity (which is
// about 2.5K worst case, with avx512).
diff --git a/pkg/eventchannel/BUILD b/pkg/eventchannel/BUILD
index d1483db0a..4c336ea84 100644
--- a/pkg/eventchannel/BUILD
+++ b/pkg/eventchannel/BUILD
@@ -8,7 +8,7 @@ go_library(
srcs = [
"event.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/eventchannel",
+ importpath = "gvisor.dev/gvisor/pkg/eventchannel",
visibility = ["//:sandbox"],
deps = [
":eventchannel_go_proto",
@@ -26,7 +26,7 @@ proto_library(
go_proto_library(
name = "eventchannel_go_proto",
- importpath = "gvisor.googlesource.com/gvisor/pkg/eventchannel/eventchannel_go_proto",
+ importpath = "gvisor.dev/gvisor/pkg/eventchannel/eventchannel_go_proto",
proto = ":eventchannel_proto",
visibility = ["//:sandbox"],
)
diff --git a/pkg/eventchannel/event.go b/pkg/eventchannel/event.go
index 4c8ae573b..f6d26532b 100644
--- a/pkg/eventchannel/event.go
+++ b/pkg/eventchannel/event.go
@@ -27,9 +27,9 @@ import (
"github.com/golang/protobuf/proto"
"github.com/golang/protobuf/ptypes"
- pb "gvisor.googlesource.com/gvisor/pkg/eventchannel/eventchannel_go_proto"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/unet"
+ pb "gvisor.dev/gvisor/pkg/eventchannel/eventchannel_go_proto"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/unet"
)
// Emitter emits a proto message.
@@ -144,7 +144,7 @@ type debugEmitter struct {
inner Emitter
}
-// DebugEmitterFrom creates a new event channel emitter by wraping an existing
+// DebugEmitterFrom creates a new event channel emitter by wrapping an existing
// raw emitter.
func DebugEmitterFrom(inner Emitter) Emitter {
return &debugEmitter{
diff --git a/pkg/fd/BUILD b/pkg/fd/BUILD
index ab1109157..785c685a0 100644
--- a/pkg/fd/BUILD
+++ b/pkg/fd/BUILD
@@ -5,7 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "fd",
srcs = ["fd.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/fd",
+ importpath = "gvisor.dev/gvisor/pkg/fd",
visibility = ["//visibility:public"],
)
diff --git a/pkg/fd/fd.go b/pkg/fd/fd.go
index 2785243a2..83bcfe220 100644
--- a/pkg/fd/fd.go
+++ b/pkg/fd/fd.go
@@ -167,7 +167,7 @@ func NewFromFile(file *os.File) (*FD, error) {
return New(fd), nil
}
-// Open is equivallent to open(2).
+// Open is equivalent to open(2).
func Open(path string, openmode int, perm uint32) (*FD, error) {
f, err := syscall.Open(path, openmode|syscall.O_LARGEFILE, perm)
if err != nil {
@@ -176,7 +176,7 @@ func Open(path string, openmode int, perm uint32) (*FD, error) {
return New(f), nil
}
-// OpenAt is equivallent to openat(2).
+// OpenAt is equivalent to openat(2).
func OpenAt(dir *FD, path string, flags int, mode uint32) (*FD, error) {
f, err := syscall.Openat(dir.FD(), path, flags, mode)
if err != nil {
diff --git a/pkg/fdchannel/BUILD b/pkg/fdchannel/BUILD
new file mode 100644
index 000000000..e54e7371c
--- /dev/null
+++ b/pkg/fdchannel/BUILD
@@ -0,0 +1,17 @@
+load("//tools/go_stateify:defs.bzl", "go_library", "go_test")
+
+package(licenses = ["notice"])
+
+go_library(
+ name = "fdchannel",
+ srcs = ["fdchannel_unsafe.go"],
+ importpath = "gvisor.dev/gvisor/pkg/fdchannel",
+ visibility = ["//visibility:public"],
+)
+
+go_test(
+ name = "fdchannel_test",
+ size = "small",
+ srcs = ["fdchannel_test.go"],
+ embed = [":fdchannel"],
+)
diff --git a/pkg/fdchannel/fdchannel_test.go b/pkg/fdchannel/fdchannel_test.go
new file mode 100644
index 000000000..5d01dc636
--- /dev/null
+++ b/pkg/fdchannel/fdchannel_test.go
@@ -0,0 +1,131 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fdchannel
+
+import (
+ "io/ioutil"
+ "os"
+ "sync"
+ "syscall"
+ "testing"
+ "time"
+)
+
+func TestSendRecvFD(t *testing.T) {
+ sendFile, err := ioutil.TempFile("", "fdchannel_test_")
+ if err != nil {
+ t.Fatalf("failed to create temporary file: %v", err)
+ }
+ defer sendFile.Close()
+
+ chanFDs, err := NewConnectedSockets()
+ if err != nil {
+ t.Fatalf("failed to create fdchannel sockets: %v", err)
+ }
+ sendEP := NewEndpoint(chanFDs[0])
+ defer sendEP.Destroy()
+ recvEP := NewEndpoint(chanFDs[1])
+ defer recvEP.Destroy()
+
+ recvFD, err := recvEP.RecvFDNonblock()
+ if err != syscall.EAGAIN && err != syscall.EWOULDBLOCK {
+ t.Errorf("RecvFDNonblock before SendFD: got (%d, %v), wanted (<unspecified>, EAGAIN or EWOULDBLOCK", recvFD, err)
+ }
+
+ if err := sendEP.SendFD(int(sendFile.Fd())); err != nil {
+ t.Fatalf("SendFD failed: %v", err)
+ }
+ recvFD, err = recvEP.RecvFD()
+ if err != nil {
+ t.Fatalf("RecvFD failed: %v", err)
+ }
+ recvFile := os.NewFile(uintptr(recvFD), "received file")
+ defer recvFile.Close()
+
+ sendInfo, err := sendFile.Stat()
+ if err != nil {
+ t.Fatalf("failed to stat sent file: %v", err)
+ }
+ sendInfoSys := sendInfo.Sys()
+ sendStat, ok := sendInfoSys.(*syscall.Stat_t)
+ if !ok {
+ t.Fatalf("sent file's FileInfo is backed by unknown type %T", sendInfoSys)
+ }
+
+ recvInfo, err := recvFile.Stat()
+ if err != nil {
+ t.Fatalf("failed to stat received file: %v", err)
+ }
+ recvInfoSys := recvInfo.Sys()
+ recvStat, ok := recvInfoSys.(*syscall.Stat_t)
+ if !ok {
+ t.Fatalf("received file's FileInfo is backed by unknown type %T", recvInfoSys)
+ }
+
+ if sendStat.Dev != recvStat.Dev || sendStat.Ino != recvStat.Ino {
+ t.Errorf("sent file (dev=%d, ino=%d) does not match received file (dev=%d, ino=%d)", sendStat.Dev, sendStat.Ino, recvStat.Dev, recvStat.Ino)
+ }
+}
+
+func TestShutdownThenRecvFD(t *testing.T) {
+ sendFile, err := ioutil.TempFile("", "fdchannel_test_")
+ if err != nil {
+ t.Fatalf("failed to create temporary file: %v", err)
+ }
+ defer sendFile.Close()
+
+ chanFDs, err := NewConnectedSockets()
+ if err != nil {
+ t.Fatalf("failed to create fdchannel sockets: %v", err)
+ }
+ sendEP := NewEndpoint(chanFDs[0])
+ defer sendEP.Destroy()
+ recvEP := NewEndpoint(chanFDs[1])
+ defer recvEP.Destroy()
+
+ recvEP.Shutdown()
+ if _, err := recvEP.RecvFD(); err == nil {
+ t.Error("RecvFD succeeded unexpectedly")
+ }
+}
+
+func TestRecvFDThenShutdown(t *testing.T) {
+ sendFile, err := ioutil.TempFile("", "fdchannel_test_")
+ if err != nil {
+ t.Fatalf("failed to create temporary file: %v", err)
+ }
+ defer sendFile.Close()
+
+ chanFDs, err := NewConnectedSockets()
+ if err != nil {
+ t.Fatalf("failed to create fdchannel sockets: %v", err)
+ }
+ sendEP := NewEndpoint(chanFDs[0])
+ defer sendEP.Destroy()
+ recvEP := NewEndpoint(chanFDs[1])
+ defer recvEP.Destroy()
+
+ var receiverWG sync.WaitGroup
+ receiverWG.Add(1)
+ go func() {
+ defer receiverWG.Done()
+ if _, err := recvEP.RecvFD(); err == nil {
+ t.Error("RecvFD succeeded unexpectedly")
+ }
+ }()
+ defer receiverWG.Wait()
+ time.Sleep(time.Second) // to ensure recvEP.RecvFD() has blocked
+ recvEP.Shutdown()
+}
diff --git a/pkg/fdchannel/fdchannel_unsafe.go b/pkg/fdchannel/fdchannel_unsafe.go
new file mode 100644
index 000000000..367235be5
--- /dev/null
+++ b/pkg/fdchannel/fdchannel_unsafe.go
@@ -0,0 +1,146 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris
+
+// Package fdchannel implements passing file descriptors between processes over
+// Unix domain sockets.
+package fdchannel
+
+import (
+ "fmt"
+ "reflect"
+ "sync/atomic"
+ "syscall"
+ "unsafe"
+)
+
+// int32 is the real type of a file descriptor.
+const sizeofInt32 = int(unsafe.Sizeof(int32(0)))
+
+// NewConnectedSockets returns a pair of file descriptors, owned by the caller,
+// representing connected sockets that may be passed to separate calls to
+// NewEndpoint to create connected Endpoints.
+func NewConnectedSockets() ([2]int, error) {
+ return syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_SEQPACKET|syscall.SOCK_CLOEXEC, 0)
+}
+
+// Endpoint sends file descriptors to, and receives them from, another
+// connected Endpoint.
+//
+// Endpoint is not copyable or movable by value.
+type Endpoint struct {
+ sockfd int32 // accessed using atomic memory operations
+ msghdr syscall.Msghdr
+ cmsg *syscall.Cmsghdr // followed by sizeofInt32 bytes of data
+}
+
+// Init must be called on zero-value Endpoints before first use. sockfd must be
+// a blocking AF_UNIX SOCK_SEQPACKET socket.
+func (ep *Endpoint) Init(sockfd int) {
+ // "Datagram sockets in various domains (e.g., the UNIX and Internet
+ // domains) permit zero-length datagrams." - recv(2). Experimentally,
+ // sendmsg+recvmsg for a zero-length datagram is slightly faster than
+ // sendmsg+recvmsg for a single byte over a stream socket.
+ cmsgSlice := make([]byte, syscall.CmsgSpace(sizeofInt32))
+ cmsgReflect := (*reflect.SliceHeader)((unsafe.Pointer)(&cmsgSlice))
+ ep.sockfd = int32(sockfd)
+ ep.msghdr.Control = (*byte)((unsafe.Pointer)(cmsgReflect.Data))
+ ep.cmsg = (*syscall.Cmsghdr)((unsafe.Pointer)(cmsgReflect.Data))
+ // ep.msghdr.Controllen and ep.cmsg.* are mutated by recvmsg(2), so they're
+ // set before calling sendmsg/recvmsg.
+}
+
+// NewEndpoint is a convenience function that returns an initialized Endpoint
+// allocated on the heap.
+func NewEndpoint(sockfd int) *Endpoint {
+ ep := &Endpoint{}
+ ep.Init(sockfd)
+ return ep
+}
+
+// Destroy releases resources owned by ep. No other Endpoint methods may be
+// called after Destroy.
+func (ep *Endpoint) Destroy() {
+ // These need not use sync/atomic since there must not be any concurrent
+ // calls to Endpoint methods.
+ if ep.sockfd >= 0 {
+ syscall.Close(int(ep.sockfd))
+ ep.sockfd = -1
+ }
+}
+
+// Shutdown causes concurrent and future calls to ep.SendFD(), ep.RecvFD(), and
+// ep.RecvFDNonblock(), as well as the same calls in the connected Endpoint, to
+// unblock and return errors. It does not wait for concurrent calls to return.
+//
+// Shutdown is the only Endpoint method that may be called concurrently with
+// other methods.
+func (ep *Endpoint) Shutdown() {
+ if sockfd := int(atomic.SwapInt32(&ep.sockfd, -1)); sockfd >= 0 {
+ syscall.Shutdown(sockfd, syscall.SHUT_RDWR)
+ syscall.Close(sockfd)
+ }
+}
+
+// SendFD sends the open file description represented by the given file
+// descriptor to the connected Endpoint.
+func (ep *Endpoint) SendFD(fd int) error {
+ cmsgLen := syscall.CmsgLen(sizeofInt32)
+ ep.cmsg.Level = syscall.SOL_SOCKET
+ ep.cmsg.Type = syscall.SCM_RIGHTS
+ ep.cmsg.SetLen(cmsgLen)
+ *ep.cmsgData() = int32(fd)
+ ep.msghdr.SetControllen(cmsgLen)
+ _, _, e := syscall.Syscall(syscall.SYS_SENDMSG, uintptr(atomic.LoadInt32(&ep.sockfd)), uintptr((unsafe.Pointer)(&ep.msghdr)), 0)
+ if e != 0 {
+ return e
+ }
+ return nil
+}
+
+// RecvFD receives an open file description from the connected Endpoint and
+// returns a file descriptor representing it, owned by the caller.
+func (ep *Endpoint) RecvFD() (int, error) {
+ return ep.recvFD(0)
+}
+
+// RecvFDNonblock receives an open file description from the connected Endpoint
+// and returns a file descriptor representing it, owned by the caller. If there
+// are no pending receivable open file descriptions, RecvFDNonblock returns
+// (<unspecified>, EAGAIN or EWOULDBLOCK).
+func (ep *Endpoint) RecvFDNonblock() (int, error) {
+ return ep.recvFD(syscall.MSG_DONTWAIT)
+}
+
+func (ep *Endpoint) recvFD(flags uintptr) (int, error) {
+ cmsgLen := syscall.CmsgLen(sizeofInt32)
+ ep.msghdr.SetControllen(cmsgLen)
+ _, _, e := syscall.Syscall(syscall.SYS_RECVMSG, uintptr(atomic.LoadInt32(&ep.sockfd)), uintptr((unsafe.Pointer)(&ep.msghdr)), flags|syscall.MSG_TRUNC)
+ if e != 0 {
+ return -1, e
+ }
+ if int(ep.msghdr.Controllen) != cmsgLen {
+ return -1, fmt.Errorf("received control message has incorrect length: got %d, wanted %d", ep.msghdr.Controllen, cmsgLen)
+ }
+ if ep.cmsg.Level != syscall.SOL_SOCKET || ep.cmsg.Type != syscall.SCM_RIGHTS {
+ return -1, fmt.Errorf("received control message has incorrect (level, type): got (%v, %v), wanted (%v, %v)", ep.cmsg.Level, ep.cmsg.Type, syscall.SOL_SOCKET, syscall.SCM_RIGHTS)
+ }
+ return int(*ep.cmsgData()), nil
+}
+
+func (ep *Endpoint) cmsgData() *int32 {
+ // syscall.CmsgLen(0) == syscall.cmsgAlignOf(syscall.SizeofCmsghdr)
+ return (*int32)((unsafe.Pointer)(uintptr((unsafe.Pointer)(ep.cmsg)) + uintptr(syscall.CmsgLen(0))))
+}
diff --git a/pkg/fdnotifier/BUILD b/pkg/fdnotifier/BUILD
index 8c8d193cc..d0552c06e 100644
--- a/pkg/fdnotifier/BUILD
+++ b/pkg/fdnotifier/BUILD
@@ -8,7 +8,7 @@ go_library(
"fdnotifier.go",
"poll_unsafe.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/fdnotifier",
+ importpath = "gvisor.dev/gvisor/pkg/fdnotifier",
visibility = ["//:sandbox"],
deps = ["//pkg/waiter"],
)
diff --git a/pkg/fdnotifier/fdnotifier.go b/pkg/fdnotifier/fdnotifier.go
index f0b028b0b..58529f99f 100644
--- a/pkg/fdnotifier/fdnotifier.go
+++ b/pkg/fdnotifier/fdnotifier.go
@@ -25,7 +25,7 @@ import (
"sync"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/waiter"
)
type fdInfo struct {
diff --git a/pkg/fdnotifier/poll_unsafe.go b/pkg/fdnotifier/poll_unsafe.go
index bc5e0ac44..ab8857b5e 100644
--- a/pkg/fdnotifier/poll_unsafe.go
+++ b/pkg/fdnotifier/poll_unsafe.go
@@ -20,7 +20,7 @@ import (
"syscall"
"unsafe"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// NonBlockingPoll polls the given FD in non-blocking fashion. It is used just
diff --git a/pkg/flipcall/BUILD b/pkg/flipcall/BUILD
new file mode 100644
index 000000000..7126fc45f
--- /dev/null
+++ b/pkg/flipcall/BUILD
@@ -0,0 +1,31 @@
+load("//tools/go_stateify:defs.bzl", "go_library", "go_test")
+
+package(licenses = ["notice"])
+
+go_library(
+ name = "flipcall",
+ srcs = [
+ "endpoint_futex.go",
+ "endpoint_unsafe.go",
+ "flipcall.go",
+ "futex_linux.go",
+ "packet_window_allocator.go",
+ ],
+ importpath = "gvisor.dev/gvisor/pkg/flipcall",
+ visibility = ["//visibility:public"],
+ deps = [
+ "//pkg/abi/linux",
+ "//pkg/log",
+ "//pkg/memutil",
+ ],
+)
+
+go_test(
+ name = "flipcall_test",
+ size = "small",
+ srcs = [
+ "flipcall_example_test.go",
+ "flipcall_test.go",
+ ],
+ embed = [":flipcall"],
+)
diff --git a/pkg/flipcall/endpoint_futex.go b/pkg/flipcall/endpoint_futex.go
new file mode 100644
index 000000000..5cab02b1d
--- /dev/null
+++ b/pkg/flipcall/endpoint_futex.go
@@ -0,0 +1,45 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package flipcall
+
+import (
+ "fmt"
+)
+
+type endpointControlState struct{}
+
+func (ep *Endpoint) initControlState(ctrlMode ControlMode) error {
+ if ctrlMode != ControlModeFutex {
+ return fmt.Errorf("unsupported control mode: %v", ctrlMode)
+ }
+ return nil
+}
+
+func (ep *Endpoint) doRoundTrip() error {
+ return ep.doFutexRoundTrip()
+}
+
+func (ep *Endpoint) doWaitFirst() error {
+ return ep.doFutexWaitFirst()
+}
+
+func (ep *Endpoint) doNotifyLast() error {
+ return ep.doFutexNotifyLast()
+}
+
+// Preconditions: ep.isShutdown() == true.
+func (ep *Endpoint) interruptForShutdown() {
+ ep.doFutexInterruptForShutdown()
+}
diff --git a/pkg/flipcall/endpoint_unsafe.go b/pkg/flipcall/endpoint_unsafe.go
new file mode 100644
index 000000000..8319955e0
--- /dev/null
+++ b/pkg/flipcall/endpoint_unsafe.go
@@ -0,0 +1,238 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package flipcall
+
+import (
+ "fmt"
+ "math"
+ "reflect"
+ "sync/atomic"
+ "syscall"
+ "unsafe"
+)
+
+// An Endpoint provides the ability to synchronously transfer data and control
+// to a connected peer Endpoint, which may be in another process.
+//
+// Since the Endpoint control transfer model is synchronous, at any given time
+// one Endpoint "has control" (designated the *active* Endpoint), and the other
+// is "waiting for control" (designated the *inactive* Endpoint). Users of the
+// flipcall package arbitrarily designate one Endpoint as initially-active, and
+// the other as initially-inactive; in a client/server protocol, the client
+// Endpoint is usually initially-active (able to send a request) and the server
+// Endpoint is usually initially-inactive (waiting for a request). The
+// initially-active Endpoint writes data to be sent to Endpoint.Data(), and
+// then synchronously transfers control to the inactive Endpoint by calling
+// Endpoint.SendRecv(), becoming the inactive Endpoint in the process. The
+// initially-inactive Endpoint waits for control by calling
+// Endpoint.RecvFirst(); receiving control causes it to become the active
+// Endpoint. After this, the protocol is symmetric: the active Endpoint reads
+// data sent by the peer by reading from Endpoint.Data(), writes data to be
+// sent to the peer into Endpoint.Data(), and then calls Endpoint.SendRecv() to
+// exchange roles with the peer, which blocks until the peer has done the same.
+type Endpoint struct {
+ // shutdown is non-zero if Endpoint.Shutdown() has been called. shutdown is
+ // accessed using atomic memory operations.
+ shutdown uint32
+
+ // dataCap is the size of the datagram part of the packet window in bytes.
+ // dataCap is immutable.
+ dataCap uint32
+
+ // packet is the beginning of the packet window. packet is immutable.
+ packet unsafe.Pointer
+
+ ctrl endpointControlState
+}
+
+// Init must be called on zero-value Endpoints before first use. If it
+// succeeds, Destroy() must be called once the Endpoint is no longer in use.
+//
+// ctrlMode specifies how connected Endpoints will exchange control. Both
+// connected Endpoints must specify the same value for ctrlMode.
+//
+// pwd represents the packet window used to exchange data with the peer
+// Endpoint. FD may differ between Endpoints if they are in different
+// processes, but must represent the same file. The packet window must
+// initially be filled with zero bytes.
+func (ep *Endpoint) Init(ctrlMode ControlMode, pwd PacketWindowDescriptor) error {
+ if pwd.Length < pageSize {
+ return fmt.Errorf("packet window size (%d) less than minimum (%d)", pwd.Length, pageSize)
+ }
+ if pwd.Length > math.MaxUint32 {
+ return fmt.Errorf("packet window size (%d) exceeds maximum (%d)", pwd.Length, math.MaxUint32)
+ }
+ m, _, e := syscall.Syscall6(syscall.SYS_MMAP, 0, uintptr(pwd.Length), syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_SHARED, uintptr(pwd.FD), uintptr(pwd.Offset))
+ if e != 0 {
+ return fmt.Errorf("failed to mmap packet window: %v", e)
+ }
+ ep.dataCap = uint32(pwd.Length) - uint32(packetHeaderBytes)
+ ep.packet = (unsafe.Pointer)(m)
+ if err := ep.initControlState(ctrlMode); err != nil {
+ ep.unmapPacket()
+ return err
+ }
+ return nil
+}
+
+// NewEndpoint is a convenience function that returns an initialized Endpoint
+// allocated on the heap.
+func NewEndpoint(ctrlMode ControlMode, pwd PacketWindowDescriptor) (*Endpoint, error) {
+ var ep Endpoint
+ if err := ep.Init(ctrlMode, pwd); err != nil {
+ return nil, err
+ }
+ return &ep, nil
+}
+
+func (ep *Endpoint) unmapPacket() {
+ syscall.Syscall(syscall.SYS_MUNMAP, uintptr(ep.packet), uintptr(ep.dataCap)+packetHeaderBytes, 0)
+ ep.dataCap = 0
+ ep.packet = nil
+}
+
+// Destroy releases resources owned by ep. No other Endpoint methods may be
+// called after Destroy.
+func (ep *Endpoint) Destroy() {
+ ep.unmapPacket()
+}
+
+// Packets consist of an 8-byte header followed by an arbitrarily-sized
+// datagram. The header consists of:
+//
+// - A 4-byte native-endian sequence number, which is incremented by the active
+// Endpoint after it finishes writing to the packet window. The sequence number
+// is needed to handle spurious wakeups.
+//
+// - A 4-byte native-endian datagram length in bytes.
+const (
+ sizeofUint32 = unsafe.Sizeof(uint32(0))
+ packetHeaderBytes = 2 * sizeofUint32
+)
+
+func (ep *Endpoint) seq() *uint32 {
+ return (*uint32)(ep.packet)
+}
+
+func (ep *Endpoint) dataLen() *uint32 {
+ return (*uint32)((unsafe.Pointer)(uintptr(ep.packet) + sizeofUint32))
+}
+
+// DataCap returns the maximum datagram size supported by ep in bytes.
+func (ep *Endpoint) DataCap() uint32 {
+ return ep.dataCap
+}
+
+func (ep *Endpoint) data() unsafe.Pointer {
+ return unsafe.Pointer(uintptr(ep.packet) + packetHeaderBytes)
+}
+
+// Data returns the datagram part of ep's packet window as a byte slice.
+//
+// Note that the packet window is shared with the potentially-untrusted peer
+// Endpoint, which may concurrently mutate the contents of the packet window.
+// Thus:
+//
+// - Readers must not assume that two reads of the same byte in Data() will
+// return the same result. In other words, readers should read any given byte
+// in Data() at most once.
+//
+// - Writers must not assume that they will read back the same data that they
+// have written. In other words, writers should avoid reading from Data() at
+// all.
+func (ep *Endpoint) Data() []byte {
+ var bs []byte
+ bsReflect := (*reflect.SliceHeader)((unsafe.Pointer)(&bs))
+ bsReflect.Data = uintptr(ep.data())
+ bsReflect.Len = int(ep.DataCap())
+ bsReflect.Cap = bsReflect.Len
+ return bs
+}
+
+// SendRecv transfers control to the peer Endpoint, causing its call to
+// Endpoint.SendRecv() or Endpoint.RecvFirst() to return with the given
+// datagram length, then blocks until the peer Endpoint calls
+// Endpoint.SendRecv() or Endpoint.SendLast().
+//
+// Preconditions: No previous call to ep.SendRecv() or ep.RecvFirst() has
+// returned an error. ep.SendLast() has never been called.
+func (ep *Endpoint) SendRecv(dataLen uint32) (uint32, error) {
+ dataCap := ep.DataCap()
+ if dataLen > dataCap {
+ return 0, fmt.Errorf("can't send packet with datagram length %d (maximum %d)", dataLen, dataCap)
+ }
+ atomic.StoreUint32(ep.dataLen(), dataLen)
+ if err := ep.doRoundTrip(); err != nil {
+ return 0, err
+ }
+ recvDataLen := atomic.LoadUint32(ep.dataLen())
+ if recvDataLen > dataCap {
+ return 0, fmt.Errorf("received packet with invalid datagram length %d (maximum %d)", recvDataLen, dataCap)
+ }
+ return recvDataLen, nil
+}
+
+// RecvFirst blocks until the peer Endpoint calls Endpoint.SendRecv(), then
+// returns the datagram length specified by that call.
+//
+// Preconditions: ep.SendRecv(), ep.RecvFirst(), and ep.SendLast() have never
+// been called.
+func (ep *Endpoint) RecvFirst() (uint32, error) {
+ if err := ep.doWaitFirst(); err != nil {
+ return 0, err
+ }
+ recvDataLen := atomic.LoadUint32(ep.dataLen())
+ if dataCap := ep.DataCap(); recvDataLen > dataCap {
+ return 0, fmt.Errorf("received packet with invalid datagram length %d (maximum %d)", recvDataLen, dataCap)
+ }
+ return recvDataLen, nil
+}
+
+// SendLast causes the peer Endpoint's call to Endpoint.SendRecv() or
+// Endpoint.RecvFirst() to return with the given datagram length.
+//
+// Preconditions: No previous call to ep.SendRecv() or ep.RecvFirst() has
+// returned an error. ep.SendLast() has never been called.
+func (ep *Endpoint) SendLast(dataLen uint32) error {
+ dataCap := ep.DataCap()
+ if dataLen > dataCap {
+ return fmt.Errorf("can't send packet with datagram length %d (maximum %d)", dataLen, dataCap)
+ }
+ atomic.StoreUint32(ep.dataLen(), dataLen)
+ if err := ep.doNotifyLast(); err != nil {
+ return err
+ }
+ return nil
+}
+
+// Shutdown causes concurrent and future calls to ep.SendRecv(),
+// ep.RecvFirst(), and ep.SendLast() to unblock and return errors. It does not
+// wait for concurrent calls to return.
+func (ep *Endpoint) Shutdown() {
+ if atomic.SwapUint32(&ep.shutdown, 1) == 0 {
+ ep.interruptForShutdown()
+ }
+}
+
+func (ep *Endpoint) isShutdown() bool {
+ return atomic.LoadUint32(&ep.shutdown) != 0
+}
+
+type endpointShutdownError struct{}
+
+// Error implements error.Error.
+func (endpointShutdownError) Error() string {
+ return "Endpoint.Shutdown() has been called"
+}
diff --git a/pkg/flipcall/flipcall.go b/pkg/flipcall/flipcall.go
new file mode 100644
index 000000000..79a1e418a
--- /dev/null
+++ b/pkg/flipcall/flipcall.go
@@ -0,0 +1,32 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package flipcall implements a protocol providing Fast Local Interprocess
+// Procedure Calls.
+package flipcall
+
+// ControlMode defines how control is exchanged across a connection.
+type ControlMode uint8
+
+const (
+ // ControlModeInvalid is invalid, and exists so that ControlMode fields in
+ // structs must be explicitly initialized.
+ ControlModeInvalid ControlMode = iota
+
+ // ControlModeFutex uses shared futex operations on packet control words.
+ ControlModeFutex
+
+ // controlModeCount is the number of ControlModes in this list.
+ controlModeCount
+)
diff --git a/pkg/flipcall/flipcall_example_test.go b/pkg/flipcall/flipcall_example_test.go
new file mode 100644
index 000000000..572a1f119
--- /dev/null
+++ b/pkg/flipcall/flipcall_example_test.go
@@ -0,0 +1,106 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package flipcall
+
+import (
+ "bytes"
+ "fmt"
+)
+
+func Example() {
+ const (
+ reqPrefix = "request "
+ respPrefix = "response "
+ count = 3
+ maxMessageLen = len(respPrefix) + 1 // 1 digit
+ )
+
+ pwa, err := NewPacketWindowAllocator()
+ if err != nil {
+ panic(err)
+ }
+ defer pwa.Destroy()
+ pwd, err := pwa.Allocate(PacketWindowLengthForDataCap(uint32(maxMessageLen)))
+ if err != nil {
+ panic(err)
+ }
+ clientEP, err := NewEndpoint(ControlModeFutex, pwd)
+ if err != nil {
+ panic(err)
+ }
+ defer clientEP.Destroy()
+ serverEP, err := NewEndpoint(ControlModeFutex, pwd)
+ if err != nil {
+ panic(err)
+ }
+ defer serverEP.Destroy()
+
+ serverDone := make(chan struct{})
+ go func() {
+ defer func() { serverDone <- struct{}{} }()
+ i := 0
+ var buf bytes.Buffer
+ // wait for first request
+ n, err := serverEP.RecvFirst()
+ if err != nil {
+ return
+ }
+ for {
+ // read request
+ buf.Reset()
+ buf.Write(serverEP.Data()[:n])
+ fmt.Println(buf.String())
+ // write response
+ buf.Reset()
+ fmt.Fprintf(&buf, "%s%d", respPrefix, i)
+ copy(serverEP.Data(), buf.Bytes())
+ // send response and wait for next request
+ n, err = serverEP.SendRecv(uint32(buf.Len()))
+ if err != nil {
+ return
+ }
+ i++
+ }
+ }()
+ defer func() {
+ serverEP.Shutdown()
+ <-serverDone
+ }()
+
+ var buf bytes.Buffer
+ for i := 0; i < count; i++ {
+ // write request
+ buf.Reset()
+ fmt.Fprintf(&buf, "%s%d", reqPrefix, i)
+ copy(clientEP.Data(), buf.Bytes())
+ // send request and wait for response
+ n, err := clientEP.SendRecv(uint32(buf.Len()))
+ if err != nil {
+ panic(err)
+ }
+ // read response
+ buf.Reset()
+ buf.Write(clientEP.Data()[:n])
+ fmt.Println(buf.String())
+ }
+
+ // Output:
+ // request 0
+ // response 0
+ // request 1
+ // response 1
+ // request 2
+ // response 2
+}
diff --git a/pkg/flipcall/flipcall_test.go b/pkg/flipcall/flipcall_test.go
new file mode 100644
index 000000000..20d3002f0
--- /dev/null
+++ b/pkg/flipcall/flipcall_test.go
@@ -0,0 +1,211 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package flipcall
+
+import (
+ "testing"
+ "time"
+)
+
+var testPacketWindowSize = pageSize
+
+func testSendRecv(t *testing.T, ctrlMode ControlMode) {
+ pwa, err := NewPacketWindowAllocator()
+ if err != nil {
+ t.Fatalf("failed to create PacketWindowAllocator: %v", err)
+ }
+ defer pwa.Destroy()
+ pwd, err := pwa.Allocate(testPacketWindowSize)
+ if err != nil {
+ t.Fatalf("PacketWindowAllocator.Allocate() failed: %v", err)
+ }
+
+ sendEP, err := NewEndpoint(ctrlMode, pwd)
+ if err != nil {
+ t.Fatalf("failed to create Endpoint: %v", err)
+ }
+ defer sendEP.Destroy()
+ recvEP, err := NewEndpoint(ctrlMode, pwd)
+ if err != nil {
+ t.Fatalf("failed to create Endpoint: %v", err)
+ }
+ defer recvEP.Destroy()
+
+ otherThreadDone := make(chan struct{})
+ go func() {
+ defer func() { otherThreadDone <- struct{}{} }()
+ t.Logf("initially-inactive Endpoint waiting for packet 1")
+ if _, err := recvEP.RecvFirst(); err != nil {
+ t.Fatalf("initially-inactive Endpoint.RecvFirst() failed: %v", err)
+ }
+ t.Logf("initially-inactive Endpoint got packet 1, sending packet 2 and waiting for packet 3")
+ if _, err := recvEP.SendRecv(0); err != nil {
+ t.Fatalf("initially-inactive Endpoint.SendRecv() failed: %v", err)
+ }
+ t.Logf("initially-inactive Endpoint got packet 3")
+ }()
+ defer func() {
+ t.Logf("waiting for initially-inactive Endpoint goroutine to complete")
+ <-otherThreadDone
+ }()
+
+ t.Logf("initially-active Endpoint sending packet 1 and waiting for packet 2")
+ if _, err := sendEP.SendRecv(0); err != nil {
+ t.Fatalf("initially-active Endpoint.SendRecv() failed: %v", err)
+ }
+ t.Logf("initially-active Endpoint got packet 2, sending packet 3")
+ if err := sendEP.SendLast(0); err != nil {
+ t.Fatalf("initially-active Endpoint.SendLast() failed: %v", err)
+ }
+}
+
+func TestFutexSendRecv(t *testing.T) {
+ testSendRecv(t, ControlModeFutex)
+}
+
+func testRecvFirstShutdown(t *testing.T, ctrlMode ControlMode) {
+ pwa, err := NewPacketWindowAllocator()
+ if err != nil {
+ t.Fatalf("failed to create PacketWindowAllocator: %v", err)
+ }
+ defer pwa.Destroy()
+ pwd, err := pwa.Allocate(testPacketWindowSize)
+ if err != nil {
+ t.Fatalf("PacketWindowAllocator.Allocate() failed: %v", err)
+ }
+
+ ep, err := NewEndpoint(ctrlMode, pwd)
+ if err != nil {
+ t.Fatalf("failed to create Endpoint: %v", err)
+ }
+ defer ep.Destroy()
+
+ otherThreadDone := make(chan struct{})
+ go func() {
+ defer func() { otherThreadDone <- struct{}{} }()
+ _, err := ep.RecvFirst()
+ if err == nil {
+ t.Errorf("Endpoint.RecvFirst() succeeded unexpectedly")
+ }
+ }()
+
+ time.Sleep(time.Second) // to ensure ep.RecvFirst() has blocked
+ ep.Shutdown()
+ <-otherThreadDone
+}
+
+func TestFutexRecvFirstShutdown(t *testing.T) {
+ testRecvFirstShutdown(t, ControlModeFutex)
+}
+
+func testSendRecvShutdown(t *testing.T, ctrlMode ControlMode) {
+ pwa, err := NewPacketWindowAllocator()
+ if err != nil {
+ t.Fatalf("failed to create PacketWindowAllocator: %v", err)
+ }
+ defer pwa.Destroy()
+ pwd, err := pwa.Allocate(testPacketWindowSize)
+ if err != nil {
+ t.Fatalf("PacketWindowAllocator.Allocate() failed: %v", err)
+ }
+
+ sendEP, err := NewEndpoint(ctrlMode, pwd)
+ if err != nil {
+ t.Fatalf("failed to create Endpoint: %v", err)
+ }
+ defer sendEP.Destroy()
+ recvEP, err := NewEndpoint(ctrlMode, pwd)
+ if err != nil {
+ t.Fatalf("failed to create Endpoint: %v", err)
+ }
+ defer recvEP.Destroy()
+
+ otherThreadDone := make(chan struct{})
+ go func() {
+ defer func() { otherThreadDone <- struct{}{} }()
+ if _, err := recvEP.RecvFirst(); err != nil {
+ t.Fatalf("initially-inactive Endpoint.RecvFirst() failed: %v", err)
+ }
+ if _, err := recvEP.SendRecv(0); err == nil {
+ t.Errorf("initially-inactive Endpoint.SendRecv() succeeded unexpectedly")
+ }
+ }()
+
+ if _, err := sendEP.SendRecv(0); err != nil {
+ t.Fatalf("initially-active Endpoint.SendRecv() failed: %v", err)
+ }
+ time.Sleep(time.Second) // to ensure recvEP.SendRecv() has blocked
+ recvEP.Shutdown()
+ <-otherThreadDone
+}
+
+func TestFutexSendRecvShutdown(t *testing.T) {
+ testSendRecvShutdown(t, ControlModeFutex)
+}
+
+func benchmarkSendRecv(b *testing.B, ctrlMode ControlMode) {
+ pwa, err := NewPacketWindowAllocator()
+ if err != nil {
+ b.Fatalf("failed to create PacketWindowAllocator: %v", err)
+ }
+ defer pwa.Destroy()
+ pwd, err := pwa.Allocate(testPacketWindowSize)
+ if err != nil {
+ b.Fatalf("PacketWindowAllocator.Allocate() failed: %v", err)
+ }
+
+ sendEP, err := NewEndpoint(ctrlMode, pwd)
+ if err != nil {
+ b.Fatalf("failed to create Endpoint: %v", err)
+ }
+ defer sendEP.Destroy()
+ recvEP, err := NewEndpoint(ctrlMode, pwd)
+ if err != nil {
+ b.Fatalf("failed to create Endpoint: %v", err)
+ }
+ defer recvEP.Destroy()
+
+ otherThreadDone := make(chan struct{})
+ go func() {
+ defer func() { otherThreadDone <- struct{}{} }()
+ if b.N == 0 {
+ return
+ }
+ if _, err := recvEP.RecvFirst(); err != nil {
+ b.Fatalf("initially-inactive Endpoint.RecvFirst() failed: %v", err)
+ }
+ for i := 1; i < b.N; i++ {
+ if _, err := recvEP.SendRecv(0); err != nil {
+ b.Fatalf("initially-inactive Endpoint.SendRecv() failed: %v", err)
+ }
+ }
+ if err := recvEP.SendLast(0); err != nil {
+ b.Fatalf("initially-inactive Endpoint.SendLast() failed: %v", err)
+ }
+ }()
+ defer func() { <-otherThreadDone }()
+
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ if _, err := sendEP.SendRecv(0); err != nil {
+ b.Fatalf("initially-active Endpoint.SendRecv() failed: %v", err)
+ }
+ }
+ b.StopTimer()
+}
+
+func BenchmarkFutexSendRecv(b *testing.B) {
+ benchmarkSendRecv(b, ControlModeFutex)
+}
diff --git a/pkg/flipcall/futex_linux.go b/pkg/flipcall/futex_linux.go
new file mode 100644
index 000000000..3f592ad16
--- /dev/null
+++ b/pkg/flipcall/futex_linux.go
@@ -0,0 +1,94 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build linux
+
+package flipcall
+
+import (
+ "fmt"
+ "math"
+ "sync/atomic"
+ "syscall"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/log"
+)
+
+func (ep *Endpoint) doFutexRoundTrip() error {
+ ourSeq, err := ep.doFutexNotifySeq()
+ if err != nil {
+ return err
+ }
+ return ep.doFutexWaitSeq(ourSeq)
+}
+
+func (ep *Endpoint) doFutexWaitFirst() error {
+ return ep.doFutexWaitSeq(0)
+}
+
+func (ep *Endpoint) doFutexNotifyLast() error {
+ _, err := ep.doFutexNotifySeq()
+ return err
+}
+
+func (ep *Endpoint) doFutexNotifySeq() (uint32, error) {
+ ourSeq := atomic.AddUint32(ep.seq(), 1)
+ if err := ep.futexWake(1); err != nil {
+ return ourSeq, fmt.Errorf("failed to FUTEX_WAKE peer Endpoint: %v", err)
+ }
+ return ourSeq, nil
+}
+
+func (ep *Endpoint) doFutexWaitSeq(prevSeq uint32) error {
+ nextSeq := prevSeq + 1
+ for {
+ if ep.isShutdown() {
+ return endpointShutdownError{}
+ }
+ if err := ep.futexWait(prevSeq); err != nil {
+ return fmt.Errorf("failed to FUTEX_WAIT for peer Endpoint: %v", err)
+ }
+ seq := atomic.LoadUint32(ep.seq())
+ if seq == nextSeq {
+ return nil
+ }
+ if seq != prevSeq {
+ return fmt.Errorf("invalid packet sequence number %d (expected %d or %d)", seq, prevSeq, nextSeq)
+ }
+ }
+}
+
+func (ep *Endpoint) doFutexInterruptForShutdown() {
+ // Wake MaxInt32 threads to prevent a malicious or broken peer from
+ // swallowing our wakeup by FUTEX_WAITing from multiple threads.
+ if err := ep.futexWake(math.MaxInt32); err != nil {
+ log.Warningf("failed to FUTEX_WAKE Endpoint: %v", err)
+ }
+}
+
+func (ep *Endpoint) futexWake(numThreads int32) error {
+ if _, _, e := syscall.RawSyscall(syscall.SYS_FUTEX, uintptr(ep.packet), linux.FUTEX_WAKE, uintptr(numThreads)); e != 0 {
+ return e
+ }
+ return nil
+}
+
+func (ep *Endpoint) futexWait(seq uint32) error {
+ _, _, e := syscall.Syscall6(syscall.SYS_FUTEX, uintptr(ep.packet), linux.FUTEX_WAIT, uintptr(seq), 0, 0, 0)
+ if e != 0 && e != syscall.EAGAIN && e != syscall.EINTR {
+ return e
+ }
+ return nil
+}
diff --git a/pkg/flipcall/packet_window_allocator.go b/pkg/flipcall/packet_window_allocator.go
new file mode 100644
index 000000000..7b455b24d
--- /dev/null
+++ b/pkg/flipcall/packet_window_allocator.go
@@ -0,0 +1,166 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package flipcall
+
+import (
+ "fmt"
+ "math/bits"
+ "os"
+ "syscall"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/memutil"
+)
+
+var (
+ pageSize = os.Getpagesize()
+ pageMask = pageSize - 1
+)
+
+func init() {
+ if bits.OnesCount(uint(pageSize)) != 1 {
+ // This is depended on by roundUpToPage().
+ panic(fmt.Sprintf("system page size (%d) is not a power of 2", pageSize))
+ }
+ if uintptr(pageSize) < packetHeaderBytes {
+ // This is required since Endpoint.Init() imposes a minimum packet
+ // window size of 1 page.
+ panic(fmt.Sprintf("system page size (%d) is less than packet header size (%d)", pageSize, packetHeaderBytes))
+ }
+}
+
+// PacketWindowDescriptor represents a packet window, a range of pages in a
+// shared memory file that is used to exchange packets between partner
+// Endpoints.
+type PacketWindowDescriptor struct {
+ // FD is the file descriptor representing the shared memory file.
+ FD int
+
+ // Offset is the offset into the shared memory file at which the packet
+ // window begins.
+ Offset int64
+
+ // Length is the size of the packet window in bytes.
+ Length int
+}
+
+// PacketWindowLengthForDataCap returns the minimum packet window size required
+// to accommodate datagrams of the given size in bytes.
+func PacketWindowLengthForDataCap(dataCap uint32) int {
+ return roundUpToPage(int(dataCap) + int(packetHeaderBytes))
+}
+
+func roundUpToPage(x int) int {
+ return (x + pageMask) &^ pageMask
+}
+
+// A PacketWindowAllocator owns a shared memory file, and allocates packet
+// windows from it.
+type PacketWindowAllocator struct {
+ fd int
+ nextAlloc int64
+ fileSize int64
+}
+
+// Init must be called on zero-value PacketWindowAllocators before first use.
+// If it succeeds, Destroy() must be called once the PacketWindowAllocator is
+// no longer in use.
+func (pwa *PacketWindowAllocator) Init() error {
+ fd, err := memutil.CreateMemFD("flipcall_packet_windows", linux.MFD_CLOEXEC|linux.MFD_ALLOW_SEALING)
+ if err != nil {
+ return fmt.Errorf("failed to create memfd: %v", err)
+ }
+ // Apply F_SEAL_SHRINK to prevent either party from causing SIGBUS in the
+ // other by truncating the file, and F_SEAL_SEAL to prevent either party
+ // from applying F_SEAL_GROW or F_SEAL_WRITE.
+ if _, _, e := syscall.RawSyscall(syscall.SYS_FCNTL, uintptr(fd), linux.F_ADD_SEALS, linux.F_SEAL_SHRINK|linux.F_SEAL_SEAL); e != 0 {
+ syscall.Close(fd)
+ return fmt.Errorf("failed to apply memfd seals: %v", e)
+ }
+ pwa.fd = fd
+ return nil
+}
+
+// NewPacketWindowAllocator is a convenience function that returns an
+// initialized PacketWindowAllocator allocated on the heap.
+func NewPacketWindowAllocator() (*PacketWindowAllocator, error) {
+ var pwa PacketWindowAllocator
+ if err := pwa.Init(); err != nil {
+ return nil, err
+ }
+ return &pwa, nil
+}
+
+// Destroy releases resources owned by pwa. This invalidates file descriptors
+// previously returned by pwa.FD() and pwd.Allocate().
+func (pwa *PacketWindowAllocator) Destroy() {
+ syscall.Close(pwa.fd)
+}
+
+// FD represents the file descriptor of the shared memory file backing pwa.
+func (pwa *PacketWindowAllocator) FD() int {
+ return pwa.fd
+}
+
+// Allocate allocates a new packet window of at least the given size and
+// returns a PacketWindowDescriptor representing it.
+//
+// Preconditions: size > 0.
+func (pwa *PacketWindowAllocator) Allocate(size int) (PacketWindowDescriptor, error) {
+ if size <= 0 {
+ return PacketWindowDescriptor{}, fmt.Errorf("invalid size: %d", size)
+ }
+ // Page-align size to ensure that pwa.nextAlloc remains page-aligned.
+ size = roundUpToPage(size)
+ if size <= 0 {
+ return PacketWindowDescriptor{}, fmt.Errorf("size %d overflows after rounding up to page size", size)
+ }
+ end := pwa.nextAlloc + int64(size) // overflow checked by ensureFileSize
+ if err := pwa.ensureFileSize(end); err != nil {
+ return PacketWindowDescriptor{}, err
+ }
+ start := pwa.nextAlloc
+ pwa.nextAlloc = end
+ return PacketWindowDescriptor{
+ FD: pwa.fd,
+ Offset: start,
+ Length: size,
+ }, nil
+}
+
+func (pwa *PacketWindowAllocator) ensureFileSize(min int64) error {
+ if min <= 0 {
+ return fmt.Errorf("file size would overflow")
+ }
+ if pwa.fileSize >= min {
+ return nil
+ }
+ newSize := 2 * pwa.fileSize
+ if newSize == 0 {
+ newSize = int64(pageSize)
+ }
+ for newSize < min {
+ newNewSize := newSize * 2
+ if newNewSize <= 0 {
+ return fmt.Errorf("file size would overflow")
+ }
+ newSize = newNewSize
+ }
+ if err := syscall.Ftruncate(pwa.fd, newSize); err != nil {
+ return fmt.Errorf("ftruncate failed: %v", err)
+ }
+ pwa.fileSize = newSize
+ return nil
+}
diff --git a/pkg/gate/BUILD b/pkg/gate/BUILD
index 83679f2da..e6a8dbd02 100644
--- a/pkg/gate/BUILD
+++ b/pkg/gate/BUILD
@@ -7,7 +7,7 @@ go_library(
srcs = [
"gate.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/gate",
+ importpath = "gvisor.dev/gvisor/pkg/gate",
visibility = ["//visibility:public"],
)
diff --git a/pkg/gate/gate_test.go b/pkg/gate/gate_test.go
index 7467e7d07..5dbd8d712 100644
--- a/pkg/gate/gate_test.go
+++ b/pkg/gate/gate_test.go
@@ -19,7 +19,7 @@ import (
"testing"
"time"
- "gvisor.googlesource.com/gvisor/pkg/gate"
+ "gvisor.dev/gvisor/pkg/gate"
)
func TestBasicEnter(t *testing.T) {
diff --git a/pkg/ilist/BUILD b/pkg/ilist/BUILD
index dbd65ab12..8f3defa25 100644
--- a/pkg/ilist/BUILD
+++ b/pkg/ilist/BUILD
@@ -8,7 +8,7 @@ go_library(
srcs = [
"interface_list.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/ilist",
+ importpath = "gvisor.dev/gvisor/pkg/ilist",
visibility = ["//visibility:public"],
)
diff --git a/pkg/linewriter/BUILD b/pkg/linewriter/BUILD
index d1aa2e7d6..c8e923a74 100644
--- a/pkg/linewriter/BUILD
+++ b/pkg/linewriter/BUILD
@@ -5,7 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "linewriter",
srcs = ["linewriter.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/linewriter",
+ importpath = "gvisor.dev/gvisor/pkg/linewriter",
visibility = ["//visibility:public"],
)
diff --git a/pkg/log/BUILD b/pkg/log/BUILD
index b2d18eddb..12615240c 100644
--- a/pkg/log/BUILD
+++ b/pkg/log/BUILD
@@ -11,7 +11,7 @@ go_library(
"json_k8s.go",
"log.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/log",
+ importpath = "gvisor.dev/gvisor/pkg/log",
visibility = [
"//visibility:public",
],
diff --git a/pkg/log/log.go b/pkg/log/log.go
index 7d563241e..9387586e6 100644
--- a/pkg/log/log.go
+++ b/pkg/log/log.go
@@ -30,7 +30,7 @@ import (
"syscall"
"time"
- "gvisor.googlesource.com/gvisor/pkg/linewriter"
+ "gvisor.dev/gvisor/pkg/linewriter"
)
// Level is the log level.
@@ -50,6 +50,19 @@ const (
Debug
)
+func (l Level) String() string {
+ switch l {
+ case Warning:
+ return "Warning"
+ case Info:
+ return "Info"
+ case Debug:
+ return "Debug"
+ default:
+ return fmt.Sprintf("Invalid level: %d", l)
+ }
+}
+
// Emitter is the final destination for logs.
type Emitter interface {
// Emit emits the given log statement. This allows for control over the
@@ -209,7 +222,7 @@ var logMu sync.Mutex
// log is the default logger.
var log atomic.Value
-// Log retieves the global logger.
+// Log retrieves the global logger.
func Log() *BasicLogger {
return log.Load().(*BasicLogger)
}
diff --git a/pkg/memutil/BUILD b/pkg/memutil/BUILD
index 71b48a972..7b50e2b28 100644
--- a/pkg/memutil/BUILD
+++ b/pkg/memutil/BUILD
@@ -5,7 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "memutil",
srcs = ["memutil_unsafe.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/memutil",
+ importpath = "gvisor.dev/gvisor/pkg/memutil",
visibility = ["//visibility:public"],
deps = ["@org_golang_x_sys//unix:go_default_library"],
)
diff --git a/pkg/metric/BUILD b/pkg/metric/BUILD
index 4b2c7a00e..3b8a691f4 100644
--- a/pkg/metric/BUILD
+++ b/pkg/metric/BUILD
@@ -6,7 +6,7 @@ package(licenses = ["notice"])
go_library(
name = "metric",
srcs = ["metric.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/metric",
+ importpath = "gvisor.dev/gvisor/pkg/metric",
visibility = ["//:sandbox"],
deps = [
":metric_go_proto",
@@ -23,7 +23,7 @@ proto_library(
go_proto_library(
name = "metric_go_proto",
- importpath = "gvisor.googlesource.com/gvisor/pkg/metric/metric_go_proto",
+ importpath = "gvisor.dev/gvisor/pkg/metric/metric_go_proto",
proto = ":metric_proto",
visibility = ["//:sandbox"],
)
diff --git a/pkg/metric/metric.go b/pkg/metric/metric.go
index 803709cc4..eadde06e4 100644
--- a/pkg/metric/metric.go
+++ b/pkg/metric/metric.go
@@ -21,9 +21,9 @@ import (
"sync"
"sync/atomic"
- "gvisor.googlesource.com/gvisor/pkg/eventchannel"
- "gvisor.googlesource.com/gvisor/pkg/log"
- pb "gvisor.googlesource.com/gvisor/pkg/metric/metric_go_proto"
+ "gvisor.dev/gvisor/pkg/eventchannel"
+ "gvisor.dev/gvisor/pkg/log"
+ pb "gvisor.dev/gvisor/pkg/metric/metric_go_proto"
)
var (
diff --git a/pkg/metric/metric_test.go b/pkg/metric/metric_test.go
index b8b124c83..34969385a 100644
--- a/pkg/metric/metric_test.go
+++ b/pkg/metric/metric_test.go
@@ -18,8 +18,8 @@ import (
"testing"
"github.com/golang/protobuf/proto"
- "gvisor.googlesource.com/gvisor/pkg/eventchannel"
- pb "gvisor.googlesource.com/gvisor/pkg/metric/metric_go_proto"
+ "gvisor.dev/gvisor/pkg/eventchannel"
+ pb "gvisor.dev/gvisor/pkg/metric/metric_go_proto"
)
// sliceEmitter implements eventchannel.Emitter by appending all messages to a
diff --git a/pkg/p9/BUILD b/pkg/p9/BUILD
index 36b2ec5f6..c6737bf97 100644
--- a/pkg/p9/BUILD
+++ b/pkg/p9/BUILD
@@ -21,7 +21,7 @@ go_library(
"transport.go",
"version.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/p9",
+ importpath = "gvisor.dev/gvisor/pkg/p9",
deps = [
"//pkg/fd",
"//pkg/log",
diff --git a/pkg/p9/client.go b/pkg/p9/client.go
index 56587e2cf..7dc20aeef 100644
--- a/pkg/p9/client.go
+++ b/pkg/p9/client.go
@@ -20,8 +20,8 @@ import (
"sync"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/unet"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/unet"
)
// ErrOutOfTags indicates no tags are available.
diff --git a/pkg/p9/client_file.go b/pkg/p9/client_file.go
index 258080f67..a6cc0617e 100644
--- a/pkg/p9/client_file.go
+++ b/pkg/p9/client_file.go
@@ -21,8 +21,8 @@ import (
"sync/atomic"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/fd"
- "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/fd"
+ "gvisor.dev/gvisor/pkg/log"
)
// Attach attaches to a server.
diff --git a/pkg/p9/client_test.go b/pkg/p9/client_test.go
index fc49729d8..87b2dd61e 100644
--- a/pkg/p9/client_test.go
+++ b/pkg/p9/client_test.go
@@ -18,7 +18,7 @@ import (
"syscall"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/unet"
+ "gvisor.dev/gvisor/pkg/unet"
)
// TestVersion tests the version negotiation.
diff --git a/pkg/p9/file.go b/pkg/p9/file.go
index a456e8b3d..907445e15 100644
--- a/pkg/p9/file.go
+++ b/pkg/p9/file.go
@@ -17,7 +17,7 @@ package p9
import (
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/fd"
+ "gvisor.dev/gvisor/pkg/fd"
)
// Attacher is provided by the server.
diff --git a/pkg/p9/handlers.go b/pkg/p9/handlers.go
index f32368763..999b4f684 100644
--- a/pkg/p9/handlers.go
+++ b/pkg/p9/handlers.go
@@ -23,8 +23,8 @@ import (
"sync/atomic"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/fd"
- "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/fd"
+ "gvisor.dev/gvisor/pkg/log"
)
// ExtractErrno extracts a syscall.Errno from a error, best effort.
@@ -224,7 +224,7 @@ func (t *Tattach) handle(cs *connState) message {
file: sf,
refs: 1,
mode: attr.Mode.FileType(),
- pathNode: &cs.server.pathTree,
+ pathNode: cs.server.pathTree,
}
defer root.DecRef()
@@ -545,15 +545,15 @@ func (t *Tunlinkat) handle(cs *connState) message {
// Before we do the unlink itself, we need to ensure that there
// are no operations in flight on associated path node. The
// child's path node lock must be held to ensure that the
- // unlink at marking the child deleted below is atomic with
+ // unlinkat marking the child deleted below is atomic with
// respect to any other read or write operations.
//
// This is one case where we have a lock ordering issue, but
// since we always acquire deeper in the hierarchy, we know
// that we are free of lock cycles.
childPathNode := ref.pathNode.pathNodeFor(t.Name)
- childPathNode.mu.Lock()
- defer childPathNode.mu.Unlock()
+ childPathNode.opMu.Lock()
+ defer childPathNode.opMu.Unlock()
// Do the unlink.
err = ref.file.UnlinkAt(t.Name, t.Flags)
diff --git a/pkg/p9/local_server/BUILD b/pkg/p9/local_server/BUILD
deleted file mode 100644
index aa6db186c..000000000
--- a/pkg/p9/local_server/BUILD
+++ /dev/null
@@ -1,14 +0,0 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_binary")
-
-package(licenses = ["notice"])
-
-go_binary(
- name = "local_server",
- srcs = ["local_server.go"],
- deps = [
- "//pkg/fd",
- "//pkg/log",
- "//pkg/p9",
- "//pkg/unet",
- ],
-)
diff --git a/pkg/p9/local_server/local_server.go b/pkg/p9/local_server/local_server.go
deleted file mode 100644
index 9546b3de5..000000000
--- a/pkg/p9/local_server/local_server.go
+++ /dev/null
@@ -1,353 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Binary local_server provides a local 9P2000.L server for the p9 package.
-//
-// To use, first start the server:
-// local_server /tmp/my_bind_addr
-//
-// Then, connect using the Linux 9P filesystem:
-// mount -t 9p -o trans=unix /tmp/my_bind_addr /mnt
-//
-// This package also serves as an examplar.
-package main
-
-import (
- "os"
- "path"
- "syscall"
-
- "gvisor.googlesource.com/gvisor/pkg/fd"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/p9"
- "gvisor.googlesource.com/gvisor/pkg/unet"
-)
-
-// local wraps a local file.
-type local struct {
- p9.DefaultWalkGetAttr
-
- path string
- file *os.File
-}
-
-// info constructs a QID for this file.
-func (l *local) info() (p9.QID, os.FileInfo, error) {
- var (
- qid p9.QID
- fi os.FileInfo
- err error
- )
-
- // Stat the file.
- if l.file != nil {
- fi, err = l.file.Stat()
- } else {
- fi, err = os.Lstat(l.path)
- }
- if err != nil {
- log.Warningf("error stating %#v: %v", l, err)
- return qid, nil, err
- }
-
- // Construct the QID type.
- qid.Type = p9.ModeFromOS(fi.Mode()).QIDType()
-
- // Save the path from the Ino.
- qid.Path = fi.Sys().(*syscall.Stat_t).Ino
- return qid, fi, nil
-}
-
-// Attach implements p9.Attacher.Attach.
-func (l *local) Attach() (p9.File, error) {
- return &local{path: "/"}, nil
-}
-
-// Walk implements p9.File.Walk.
-func (l *local) Walk(names []string) ([]p9.QID, p9.File, error) {
- var qids []p9.QID
- last := &local{path: l.path}
- for _, name := range names {
- c := &local{path: path.Join(last.path, name)}
- qid, _, err := c.info()
- if err != nil {
- return nil, nil, err
- }
- qids = append(qids, qid)
- last = c
- }
- return qids, last, nil
-}
-
-// StatFS implements p9.File.StatFS.
-//
-// Not implemented.
-func (l *local) StatFS() (p9.FSStat, error) {
- return p9.FSStat{}, syscall.ENOSYS
-}
-
-// FSync implements p9.File.FSync.
-func (l *local) FSync() error {
- return l.file.Sync()
-}
-
-// GetAttr implements p9.File.GetAttr.
-//
-// Not fully implemented.
-func (l *local) GetAttr(req p9.AttrMask) (p9.QID, p9.AttrMask, p9.Attr, error) {
- qid, fi, err := l.info()
- if err != nil {
- return qid, p9.AttrMask{}, p9.Attr{}, err
- }
-
- stat := fi.Sys().(*syscall.Stat_t)
- attr := p9.Attr{
- Mode: p9.FileMode(stat.Mode),
- UID: p9.UID(stat.Uid),
- GID: p9.GID(stat.Gid),
- NLink: stat.Nlink,
- RDev: stat.Rdev,
- Size: uint64(stat.Size),
- BlockSize: uint64(stat.Blksize),
- Blocks: uint64(stat.Blocks),
- ATimeSeconds: uint64(stat.Atim.Sec),
- ATimeNanoSeconds: uint64(stat.Atim.Nsec),
- MTimeSeconds: uint64(stat.Mtim.Sec),
- MTimeNanoSeconds: uint64(stat.Mtim.Nsec),
- CTimeSeconds: uint64(stat.Ctim.Sec),
- CTimeNanoSeconds: uint64(stat.Ctim.Nsec),
- }
- valid := p9.AttrMask{
- Mode: true,
- UID: true,
- GID: true,
- NLink: true,
- RDev: true,
- Size: true,
- Blocks: true,
- ATime: true,
- MTime: true,
- CTime: true,
- }
-
- return qid, valid, attr, nil
-}
-
-// SetAttr implements p9.File.SetAttr.
-//
-// Not implemented.
-func (l *local) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error {
- return syscall.ENOSYS
-}
-
-// Remove implements p9.File.Remove.
-//
-// Not implemented.
-func (l *local) Remove() error {
- return syscall.ENOSYS
-}
-
-// Rename implements p9.File.Rename.
-//
-// Not implemented.
-func (l *local) Rename(directory p9.File, name string) error {
- return syscall.ENOSYS
-}
-
-// Close implements p9.File.Close.
-func (l *local) Close() error {
- if l.file != nil {
- return l.file.Close()
- }
- return nil
-}
-
-// Open implements p9.File.Open.
-func (l *local) Open(mode p9.OpenFlags) (*fd.FD, p9.QID, uint32, error) {
- qid, _, err := l.info()
- if err != nil {
- return nil, qid, 0, err
- }
-
- // Do the actual open.
- f, err := os.OpenFile(l.path, int(mode), 0)
- if err != nil {
- return nil, qid, 0, err
- }
- l.file = f
-
- // Note: we don't send the local file for this server.
- return nil, qid, 4096, nil
-}
-
-// Read implements p9.File.Read.
-func (l *local) ReadAt(p []byte, offset uint64) (int, error) {
- return l.file.ReadAt(p, int64(offset))
-}
-
-// Write implements p9.File.Write.
-func (l *local) WriteAt(p []byte, offset uint64) (int, error) {
- return l.file.WriteAt(p, int64(offset))
-}
-
-// Create implements p9.File.Create.
-func (l *local) Create(name string, mode p9.OpenFlags, permissions p9.FileMode, _ p9.UID, _ p9.GID) (*fd.FD, p9.File, p9.QID, uint32, error) {
- f, err := os.OpenFile(l.path, int(mode)|syscall.O_CREAT|syscall.O_EXCL, os.FileMode(permissions))
- if err != nil {
- return nil, nil, p9.QID{}, 0, err
- }
-
- l2 := &local{path: path.Join(l.path, name), file: f}
- qid, _, err := l2.info()
- if err != nil {
- l2.Close()
- return nil, nil, p9.QID{}, 0, err
- }
-
- return nil, l2, qid, 4096, nil
-}
-
-// Mkdir implements p9.File.Mkdir.
-//
-// Not properly implemented.
-func (l *local) Mkdir(name string, permissions p9.FileMode, _ p9.UID, _ p9.GID) (p9.QID, error) {
- if err := os.Mkdir(path.Join(l.path, name), os.FileMode(permissions)); err != nil {
- return p9.QID{}, err
- }
-
- // Blank QID.
- return p9.QID{}, nil
-}
-
-// Symlink implements p9.File.Symlink.
-//
-// Not properly implemented.
-func (l *local) Symlink(oldname string, newname string, _ p9.UID, _ p9.GID) (p9.QID, error) {
- if err := os.Symlink(oldname, path.Join(l.path, newname)); err != nil {
- return p9.QID{}, err
- }
-
- // Blank QID.
- return p9.QID{}, nil
-}
-
-// Link implements p9.File.Link.
-//
-// Not properly implemented.
-func (l *local) Link(target p9.File, newname string) error {
- return os.Link(target.(*local).path, path.Join(l.path, newname))
-}
-
-// Mknod implements p9.File.Mknod.
-//
-// Not implemented.
-func (l *local) Mknod(name string, mode p9.FileMode, major uint32, minor uint32, _ p9.UID, _ p9.GID) (p9.QID, error) {
- return p9.QID{}, syscall.ENOSYS
-}
-
-// RenameAt implements p9.File.RenameAt.
-//
-// Not implemented.
-func (l *local) RenameAt(oldname string, newdir p9.File, newname string) error {
- return syscall.ENOSYS
-}
-
-// UnlinkAt implements p9.File.UnlinkAt.
-//
-// Not implemented.
-func (l *local) UnlinkAt(name string, flags uint32) error {
- return syscall.ENOSYS
-}
-
-// Readdir implements p9.File.Readdir.
-func (l *local) Readdir(offset uint64, count uint32) ([]p9.Dirent, error) {
- // We only do *all* dirents in single shot.
- const maxDirentBuffer = 1024 * 1024
- buf := make([]byte, maxDirentBuffer)
- n, err := syscall.ReadDirent(int(l.file.Fd()), buf)
- if err != nil {
- // Return zero entries.
- return nil, nil
- }
-
- // Parse the entries; note that we read up to offset+count here.
- _, newCount, newNames := syscall.ParseDirent(buf[:n], int(offset)+int(count), nil)
- var dirents []p9.Dirent
- for i := int(offset); i >= 0 && i < newCount; i++ {
- entry := local{path: path.Join(l.path, newNames[i])}
- qid, _, err := entry.info()
- if err != nil {
- continue
- }
- dirents = append(dirents, p9.Dirent{
- QID: qid,
- Type: qid.Type,
- Name: newNames[i],
- Offset: uint64(i + 1),
- })
- }
-
- return dirents, nil
-}
-
-// Readlink implements p9.File.Readlink.
-//
-// Not properly implemented.
-func (l *local) Readlink() (string, error) {
- return os.Readlink(l.path)
-}
-
-// Flush implements p9.File.Flush.
-func (l *local) Flush() error {
- return nil
-}
-
-// Connect implements p9.File.Connect.
-func (l *local) Connect(p9.ConnectFlags) (*fd.FD, error) {
- return nil, syscall.ECONNREFUSED
-}
-
-// Renamed implements p9.File.Renamed.
-func (l *local) Renamed(parent p9.File, newName string) {
- l.path = path.Join(parent.(*local).path, newName)
-}
-
-// Allocate implements p9.File.Allocate.
-func (l *local) Allocate(mode p9.AllocateMode, offset, length uint64) error {
- return syscall.Fallocate(int(l.file.Fd()), mode.ToLinux(), int64(offset), int64(length))
-}
-
-func main() {
- log.SetLevel(log.Debug)
-
- if len(os.Args) != 2 {
- log.Warningf("usage: %s <bind-addr>", os.Args[0])
- os.Exit(1)
- }
-
- // Bind and listen on the socket.
- serverSocket, err := unet.BindAndListen(os.Args[1], false)
- if err != nil {
- log.Warningf("err binding: %v", err)
- os.Exit(1)
- }
-
- // Run the server.
- s := p9.NewServer(&local{})
- s.Serve(serverSocket)
-}
-
-var (
- _ p9.File = &local{}
-)
diff --git a/pkg/p9/messages.go b/pkg/p9/messages.go
index 75d6bc832..fd9eb1c5d 100644
--- a/pkg/p9/messages.go
+++ b/pkg/p9/messages.go
@@ -18,7 +18,7 @@ import (
"fmt"
"math"
- "gvisor.googlesource.com/gvisor/pkg/fd"
+ "gvisor.dev/gvisor/pkg/fd"
)
// ErrInvalidMsgType is returned when an unsupported message type is found.
diff --git a/pkg/p9/p9.go b/pkg/p9/p9.go
index 4039862e6..e12831dbd 100644
--- a/pkg/p9/p9.go
+++ b/pkg/p9/p9.go
@@ -92,7 +92,7 @@ func (o OpenFlags) String() string {
}
}
-// Tag is a messsage tag.
+// Tag is a message tag.
type Tag uint16
// FID is a file identifier.
diff --git a/pkg/p9/p9test/BUILD b/pkg/p9/p9test/BUILD
index cf22edde8..6e939a49a 100644
--- a/pkg/p9/p9test/BUILD
+++ b/pkg/p9/p9test/BUILD
@@ -8,7 +8,7 @@ alias(
actual = "@com_github_golang_mock//mockgen:mockgen",
)
-MOCK_SRC_PACKAGE = "gvisor.googlesource.com/gvisor/pkg/p9"
+MOCK_SRC_PACKAGE = "gvisor.dev/gvisor/pkg/p9"
# mockgen_reflect is a source file that contains mock generation code that
# imports the p9 package and generates a specification via reflection. The
@@ -64,7 +64,7 @@ go_library(
"mocks.go",
"p9test.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/p9/p9test",
+ importpath = "gvisor.dev/gvisor/pkg/p9/p9test",
visibility = ["//:sandbox"],
deps = [
"//pkg/fd",
diff --git a/pkg/p9/p9test/client_test.go b/pkg/p9/p9test/client_test.go
index e00dd03ab..fe649c2e8 100644
--- a/pkg/p9/p9test/client_test.go
+++ b/pkg/p9/p9test/client_test.go
@@ -28,8 +28,8 @@ import (
"time"
"github.com/golang/mock/gomock"
- "gvisor.googlesource.com/gvisor/pkg/fd"
- "gvisor.googlesource.com/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/pkg/fd"
+ "gvisor.dev/gvisor/pkg/p9"
)
func TestPanic(t *testing.T) {
@@ -269,14 +269,14 @@ type fileGenerator func(*Harness, string, p9.File) (*Mock, *Mock, p9.File)
func walkHelper(h *Harness, name string, dir p9.File) (parentBackend *Mock, walkedBackend *Mock, walked p9.File) {
_, parent, err := dir.Walk(nil)
if err != nil {
- h.t.Fatalf("got walk err %v, want nil", err)
+ h.t.Fatalf("Walk(nil) got err %v, want nil", err)
}
defer parent.Close()
parentBackend = h.Pop(parent)
_, walked, err = parent.Walk([]string{name})
if err != nil {
- h.t.Fatalf("got walk err %v, want nil", err)
+ h.t.Fatalf("Walk(%s) got err %v, want nil", name, err)
}
walkedBackend = h.Pop(walked)
@@ -389,7 +389,7 @@ func checkDeleted(h *Harness, file p9.File) {
_, newFile, err := file.Walk(nil)
if err == syscall.EBUSY {
// We can't walk from here because this reference is open
- // aleady. Okay, we will also have unopened cases through
+ // already. Okay, we will also have unopened cases through
// TestUnlink, just skip the remove operation for now.
return
} else if err != nil {
@@ -854,6 +854,62 @@ func TestRenameAtInvalid(t *testing.T) {
}
}
+// TestRenameSecondOrder tests that indirect rename targets continue to receive
+// Renamed calls after a rename of its renamed parent. i.e.,
+//
+// 1. Create /one/file
+// 2. Create /directory
+// 3. Rename /one -> /directory/one
+// 4. Rename /directory -> /three/foo
+// 5. file from (1) should still receive Renamed.
+//
+// This is a regression test for b/135219260.
+func TestRenameSecondOrder(t *testing.T) {
+ h, c := NewHarness(t)
+ defer h.Finish()
+
+ rootBackend, root := newRoot(h, c)
+ defer root.Close()
+
+ // Walk to /one.
+ _, oneBackend, oneFile := walkHelper(h, "one", root)
+ defer oneFile.Close()
+
+ // Walk to and generate /one/file.
+ //
+ // walkHelper re-walks to oneFile, so we need the second backend,
+ // which will also receive Renamed calls.
+ oneSecondBackend, fileBackend, fileFile := walkHelper(h, "file", oneFile)
+ defer fileFile.Close()
+
+ // Walk to and generate /directory.
+ _, directoryBackend, directoryFile := walkHelper(h, "directory", root)
+ defer directoryFile.Close()
+
+ // Rename /one to /directory/one.
+ rootBackend.EXPECT().RenameAt("one", directoryBackend, "one").Return(nil)
+ expectRenamed(oneBackend, []string{}, directoryBackend, "one")
+ expectRenamed(oneSecondBackend, []string{}, directoryBackend, "one")
+ expectRenamed(fileBackend, []string{}, oneBackend, "file")
+ if err := renameAt(h, root, directoryFile, "one", "one", false); err != nil {
+ h.t.Fatalf("got rename err %v, want nil", err)
+ }
+
+ // Walk to /three.
+ _, threeBackend, threeFile := walkHelper(h, "three", root)
+ defer threeFile.Close()
+
+ // Rename /directory to /three/foo.
+ rootBackend.EXPECT().RenameAt("directory", threeBackend, "foo").Return(nil)
+ expectRenamed(directoryBackend, []string{}, threeBackend, "foo")
+ expectRenamed(oneBackend, []string{}, directoryBackend, "one")
+ expectRenamed(oneSecondBackend, []string{}, directoryBackend, "one")
+ expectRenamed(fileBackend, []string{}, oneBackend, "file")
+ if err := renameAt(h, root, threeFile, "directory", "foo", false); err != nil {
+ h.t.Fatalf("got rename err %v, want nil", err)
+ }
+}
+
func TestReadlink(t *testing.T) {
for name := range newTypeMap(nil) {
t.Run(name, func(t *testing.T) {
diff --git a/pkg/p9/p9test/p9test.go b/pkg/p9/p9test/p9test.go
index 1c8eff200..95846e5f7 100644
--- a/pkg/p9/p9test/p9test.go
+++ b/pkg/p9/p9test/p9test.go
@@ -23,8 +23,8 @@ import (
"testing"
"github.com/golang/mock/gomock"
- "gvisor.googlesource.com/gvisor/pkg/p9"
- "gvisor.googlesource.com/gvisor/pkg/unet"
+ "gvisor.dev/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/pkg/unet"
)
// Harness is an attacher mock.
diff --git a/pkg/p9/path_tree.go b/pkg/p9/path_tree.go
index f37ad4ab2..865459411 100644
--- a/pkg/p9/path_tree.go
+++ b/pkg/p9/path_tree.go
@@ -23,87 +23,199 @@ import (
//
// These are shared by all fidRefs that point to the same path.
//
-// These are not synchronized because we allow certain operations (file walk)
-// to proceed without having to acquire a write lock. The lock in this
-// structure exists to synchronize high-level, semantic operations, such as the
-// simultaneous creation and deletion of a file.
+// Lock ordering:
+// opMu
+// childMu
//
-// (+) below is the path component string.
+// Two different pathNodes may only be locked if Server.renameMu is held for
+// write, in which case they can be acquired in any order.
type pathNode struct {
- mu sync.RWMutex // See above.
- fidRefs sync.Map // => map[*fidRef]string(+)
- children sync.Map // => map[string(+)]*pathNode
- count int64
+ // opMu synchronizes high-level, sematic operations, such as the
+ // simultaneous creation and deletion of a file.
+ //
+ // opMu does not directly protect any fields in pathNode.
+ opMu sync.RWMutex
+
+ // childMu protects the fields below.
+ childMu sync.RWMutex
+
+ // childNodes maps child path component names to their pathNode.
+ childNodes map[string]*pathNode
+
+ // childRefs maps child path component names to all of the their
+ // references.
+ childRefs map[string]map[*fidRef]struct{}
+
+ // childRefNames maps child references back to their path component
+ // name.
+ childRefNames map[*fidRef]string
+}
+
+func newPathNode() *pathNode {
+ return &pathNode{
+ childNodes: make(map[string]*pathNode),
+ childRefs: make(map[string]map[*fidRef]struct{}),
+ childRefNames: make(map[*fidRef]string),
+ }
+}
+
+// forEachChildRef calls fn for each child reference.
+func (p *pathNode) forEachChildRef(fn func(ref *fidRef, name string)) {
+ p.childMu.RLock()
+ defer p.childMu.RUnlock()
+
+ for name, m := range p.childRefs {
+ for ref := range m {
+ fn(ref, name)
+ }
+ }
+}
+
+// forEachChildNode calls fn for each child pathNode.
+func (p *pathNode) forEachChildNode(fn func(pn *pathNode)) {
+ p.childMu.RLock()
+ defer p.childMu.RUnlock()
+
+ for _, pn := range p.childNodes {
+ fn(pn)
+ }
}
// pathNodeFor returns the path node for the given name, or a new one.
-//
-// Precondition: mu must be held in a readable fashion.
func (p *pathNode) pathNodeFor(name string) *pathNode {
- // Load the existing path node.
- if pn, ok := p.children.Load(name); ok {
- return pn.(*pathNode)
+ p.childMu.RLock()
+ // Fast path, node already exists.
+ if pn, ok := p.childNodes[name]; ok {
+ p.childMu.RUnlock()
+ return pn
+ }
+ p.childMu.RUnlock()
+
+ // Slow path, create a new pathNode for shared use.
+ p.childMu.Lock()
+
+ // Re-check after re-lock.
+ if pn, ok := p.childNodes[name]; ok {
+ p.childMu.Unlock()
+ return pn
}
- // Create a new pathNode for shared use.
- pn, _ := p.children.LoadOrStore(name, new(pathNode))
- return pn.(*pathNode)
+ pn := newPathNode()
+ p.childNodes[name] = pn
+ p.childMu.Unlock()
+ return pn
}
// nameFor returns the name for the given fidRef.
//
-// Precondition: mu must be held in a readable fashion.
+// Precondition: addChild is called for ref before nameFor.
func (p *pathNode) nameFor(ref *fidRef) string {
- if s, ok := p.fidRefs.Load(ref); ok {
- return s.(string)
+ p.childMu.RLock()
+ n, ok := p.childRefNames[ref]
+ p.childMu.RUnlock()
+
+ if !ok {
+ // This should not happen, don't proceed.
+ panic(fmt.Sprintf("expected name for %+v, none found", ref))
}
- // This should not happen, don't proceed.
- panic(fmt.Sprintf("expected name for %+v, none found", ref))
+ return n
}
-// addChild adds a child to the given pathNode.
-//
-// This applies only to an individual fidRef.
+// addChildLocked adds a child reference to p.
//
-// Precondition: mu must be held in a writable fashion.
-func (p *pathNode) addChild(ref *fidRef, name string) {
- if s, ok := p.fidRefs.Load(ref); ok {
+// Precondition: As addChild, plus childMu is locked for write.
+func (p *pathNode) addChildLocked(ref *fidRef, name string) {
+ if n, ok := p.childRefNames[ref]; ok {
// This should not happen, don't proceed.
- panic(fmt.Sprintf("unexpected fidRef %+v with path %q, wanted %q", ref, s, name))
+ panic(fmt.Sprintf("unexpected fidRef %+v with path %q, wanted %q", ref, n, name))
}
- p.fidRefs.Store(ref, name)
+ p.childRefNames[ref] = name
+
+ m, ok := p.childRefs[name]
+ if !ok {
+ m = make(map[*fidRef]struct{})
+ p.childRefs[name] = m
+ }
+
+ m[ref] = struct{}{}
}
-// removeChild removes the given child.
+// addChild adds a child reference to p.
//
-// This applies only to an individual fidRef.
+// Precondition: ref may only be added once at a time.
+func (p *pathNode) addChild(ref *fidRef, name string) {
+ p.childMu.Lock()
+ p.addChildLocked(ref, name)
+ p.childMu.Unlock()
+}
+
+// removeChild removes the given child.
//
-// Precondition: mu must be held in a writable fashion.
+// This applies only to an individual fidRef, which is not required to exist.
func (p *pathNode) removeChild(ref *fidRef) {
- p.fidRefs.Delete(ref)
+ p.childMu.Lock()
+
+ // This ref may not exist anymore. This can occur, e.g., in unlink,
+ // where a removeWithName removes the ref, and then a DecRef on the ref
+ // attempts to remove again.
+ if name, ok := p.childRefNames[ref]; ok {
+ m, ok := p.childRefs[name]
+ if !ok {
+ // This should not happen, don't proceed.
+ p.childMu.Unlock()
+ panic(fmt.Sprintf("name %s missing from childfidRefs", name))
+ }
+
+ delete(m, ref)
+ if len(m) == 0 {
+ delete(p.childRefs, name)
+ }
+ }
+
+ delete(p.childRefNames, ref)
+
+ p.childMu.Unlock()
}
-// removeWithName removes all references with the given name.
+// addPathNodeFor adds an existing pathNode as the node for name.
//
-// The original pathNode is returned by this function, and removed from this
-// pathNode. Any operations on the removed tree must use this value.
+// Preconditions: newName does not exist.
+func (p *pathNode) addPathNodeFor(name string, pn *pathNode) {
+ p.childMu.Lock()
+
+ if opn, ok := p.childNodes[name]; ok {
+ p.childMu.Unlock()
+ panic(fmt.Sprintf("unexpected pathNode %+v with path %q", opn, name))
+ }
+
+ p.childNodes[name] = pn
+ p.childMu.Unlock()
+}
+
+// removeWithName removes all references with the given name.
//
-// The provided function is executed after removal.
+// The provided function is executed after reference removal. The only method
+// it may (transitively) call on this pathNode is addChildLocked.
//
-// Precondition: mu must be held in a writable fashion.
+// If a child pathNode for name exists, it is removed from this pathNode and
+// returned by this function. Any operations on the removed tree must use this
+// value.
func (p *pathNode) removeWithName(name string, fn func(ref *fidRef)) *pathNode {
- p.fidRefs.Range(func(key, value interface{}) bool {
- if value.(string) == name {
- p.fidRefs.Delete(key)
- fn(key.(*fidRef))
+ p.childMu.Lock()
+ defer p.childMu.Unlock()
+
+ if m, ok := p.childRefs[name]; ok {
+ for ref := range m {
+ delete(m, ref)
+ delete(p.childRefNames, ref)
+ fn(ref)
}
- return true
- })
+ }
- // Return the original path node.
- origPathNode := p.pathNodeFor(name)
- p.children.Delete(name)
+ // Return the original path node, if it exists.
+ origPathNode := p.childNodes[name]
+ delete(p.childNodes, name)
return origPathNode
}
diff --git a/pkg/p9/server.go b/pkg/p9/server.go
index f377a6557..b294efbb0 100644
--- a/pkg/p9/server.go
+++ b/pkg/p9/server.go
@@ -21,8 +21,8 @@ import (
"sync/atomic"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/unet"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/unet"
)
// Server is a 9p2000.L server.
@@ -35,7 +35,7 @@ type Server struct {
// These may be across different connections, but rename operations
// must be serialized globally for safely. There is a single pathTree
// for the entire server, and not per connection.
- pathTree pathNode
+ pathTree *pathNode
// renameMu is a global lock protecting rename operations. With this
// lock, we can be certain that any given rename operation can safely
@@ -49,6 +49,7 @@ type Server struct {
func NewServer(attacher Attacher) *Server {
return &Server{
attacher: attacher,
+ pathTree: newPathNode(),
}
}
@@ -201,19 +202,16 @@ func (f *fidRef) maybeParent() *fidRef {
// notifyDelete marks all fidRefs as deleted.
//
-// Precondition: the write lock must be held on the given pathNode.
+// Precondition: this must be called via safelyWrite or safelyGlobal.
func notifyDelete(pn *pathNode) {
// Call on all local references.
- pn.fidRefs.Range(func(key, _ interface{}) bool {
- ref := key.(*fidRef)
+ pn.forEachChildRef(func(ref *fidRef, _ string) {
atomic.StoreUint32(&ref.deleted, 1)
- return true
})
// Call on all subtrees.
- pn.children.Range(func(_, value interface{}) bool {
- notifyDelete(value.(*pathNode))
- return true
+ pn.forEachChildNode(func(pn *pathNode) {
+ notifyDelete(pn)
})
}
@@ -225,28 +223,26 @@ func (f *fidRef) markChildDeleted(name string) {
atomic.StoreUint32(&ref.deleted, 1)
})
- // Mark everything below as deleted.
- notifyDelete(origPathNode)
+ if origPathNode != nil {
+ // Mark all children as deleted.
+ notifyDelete(origPathNode)
+ }
}
// notifyNameChange calls the relevant Renamed method on all nodes in the path,
// recursively. Note that this applies only for subtrees, as these
// notifications do not apply to the actual file whose name has changed.
//
-// Precondition: the write lock must be held on the given pathNode.
+// Precondition: this must be called via safelyGlobal.
func notifyNameChange(pn *pathNode) {
// Call on all local references.
- pn.fidRefs.Range(func(key, value interface{}) bool {
- ref := key.(*fidRef)
- name := value.(string)
+ pn.forEachChildRef(func(ref *fidRef, name string) {
ref.file.Renamed(ref.parent.file, name)
- return true
})
// Call on all subtrees.
- pn.children.Range(func(_, value interface{}) bool {
- notifyNameChange(value.(*pathNode))
- return true
+ pn.forEachChildNode(func(pn *pathNode) {
+ notifyNameChange(pn)
})
}
@@ -256,18 +252,25 @@ func notifyNameChange(pn *pathNode) {
func (f *fidRef) renameChildTo(oldName string, target *fidRef, newName string) {
target.markChildDeleted(newName)
origPathNode := f.pathNode.removeWithName(oldName, func(ref *fidRef) {
+ // N.B. DecRef can take f.pathNode's parent's childMu. This is
+ // allowed because renameMu is held for write via safelyGlobal.
ref.parent.DecRef() // Drop original reference.
ref.parent = target // Change parent.
ref.parent.IncRef() // Acquire new one.
- target.pathNode.addChild(ref, newName)
+ if f.pathNode == target.pathNode {
+ target.pathNode.addChildLocked(ref, newName)
+ } else {
+ target.pathNode.addChild(ref, newName)
+ }
ref.file.Renamed(target.file, newName)
})
- // Replace the previous (now deleted) path node.
- f.pathNode.children.Store(newName, origPathNode)
-
- // Call Renamed on everything above.
- notifyNameChange(origPathNode)
+ if origPathNode != nil {
+ // Replace the previous (now deleted) path node.
+ target.pathNode.addPathNodeFor(newName, origPathNode)
+ // Call Renamed on all children.
+ notifyNameChange(origPathNode)
+ }
}
// safelyRead executes the given operation with the local path node locked.
@@ -275,8 +278,8 @@ func (f *fidRef) renameChildTo(oldName string, target *fidRef, newName string) {
func (f *fidRef) safelyRead(fn func() error) (err error) {
f.server.renameMu.RLock()
defer f.server.renameMu.RUnlock()
- f.pathNode.mu.RLock()
- defer f.pathNode.mu.RUnlock()
+ f.pathNode.opMu.RLock()
+ defer f.pathNode.opMu.RUnlock()
return fn()
}
@@ -285,8 +288,8 @@ func (f *fidRef) safelyRead(fn func() error) (err error) {
func (f *fidRef) safelyWrite(fn func() error) (err error) {
f.server.renameMu.RLock()
defer f.server.renameMu.RUnlock()
- f.pathNode.mu.Lock()
- defer f.pathNode.mu.Unlock()
+ f.pathNode.opMu.Lock()
+ defer f.pathNode.opMu.Unlock()
return fn()
}
diff --git a/pkg/p9/transport.go b/pkg/p9/transport.go
index ef59077ff..5648df589 100644
--- a/pkg/p9/transport.go
+++ b/pkg/p9/transport.go
@@ -22,9 +22,9 @@ import (
"sync"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/fd"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/unet"
+ "gvisor.dev/gvisor/pkg/fd"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/unet"
)
// ErrSocket is returned in cases of a socket issue.
diff --git a/pkg/p9/transport_test.go b/pkg/p9/transport_test.go
index 0f88ff249..cdb3bc841 100644
--- a/pkg/p9/transport_test.go
+++ b/pkg/p9/transport_test.go
@@ -19,8 +19,8 @@ import (
"os"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/fd"
- "gvisor.googlesource.com/gvisor/pkg/unet"
+ "gvisor.dev/gvisor/pkg/fd"
+ "gvisor.dev/gvisor/pkg/unet"
)
const (
@@ -179,6 +179,51 @@ func TestSendClosed(t *testing.T) {
}
}
+func BenchmarkSendRecv(b *testing.B) {
+ server, client, err := unet.SocketPair(false)
+ if err != nil {
+ b.Fatalf("socketpair got err %v expected nil", err)
+ }
+ defer server.Close()
+ defer client.Close()
+
+ // Exchange Rflush messages since these contain no data and therefore incur
+ // no additional marshaling overhead.
+ go func() {
+ for i := 0; i < b.N; i++ {
+ tag, m, err := recv(server, maximumLength, msgRegistry.get)
+ if err != nil {
+ b.Fatalf("recv got err %v expected nil", err)
+ }
+ if tag != Tag(1) {
+ b.Fatalf("got tag %v expected 1", tag)
+ }
+ if _, ok := m.(*Rflush); !ok {
+ b.Fatalf("got message %T expected *Rflush", m)
+ }
+ if err := send(server, Tag(2), &Rflush{}); err != nil {
+ b.Fatalf("send got err %v expected nil", err)
+ }
+ }
+ }()
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ if err := send(client, Tag(1), &Rflush{}); err != nil {
+ b.Fatalf("send got err %v expected nil", err)
+ }
+ tag, m, err := recv(client, maximumLength, msgRegistry.get)
+ if err != nil {
+ b.Fatalf("recv got err %v expected nil", err)
+ }
+ if tag != Tag(2) {
+ b.Fatalf("got tag %v expected 2", tag)
+ }
+ if _, ok := m.(*Rflush); !ok {
+ b.Fatalf("got message %v expected *Rflush", m)
+ }
+ }
+}
+
func init() {
msgRegistry.register(MsgTypeBadDecode, func() message { return &badDecode{} })
}
diff --git a/pkg/procid/BUILD b/pkg/procid/BUILD
index 7c22b763a..697e7a2f4 100644
--- a/pkg/procid/BUILD
+++ b/pkg/procid/BUILD
@@ -9,7 +9,7 @@ go_library(
"procid_amd64.s",
"procid_arm64.s",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/procid",
+ importpath = "gvisor.dev/gvisor/pkg/procid",
visibility = ["//visibility:public"],
)
diff --git a/pkg/rand/BUILD b/pkg/rand/BUILD
index 4eec3a4dd..f4f2001f3 100644
--- a/pkg/rand/BUILD
+++ b/pkg/rand/BUILD
@@ -8,7 +8,7 @@ go_library(
"rand.go",
"rand_linux.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/rand",
+ importpath = "gvisor.dev/gvisor/pkg/rand",
visibility = ["//:sandbox"],
deps = ["@org_golang_x_sys//unix:go_default_library"],
)
diff --git a/pkg/refs/BUILD b/pkg/refs/BUILD
index fc562f821..9c08452fc 100644
--- a/pkg/refs/BUILD
+++ b/pkg/refs/BUILD
@@ -22,8 +22,11 @@ go_library(
"refcounter_state.go",
"weak_ref_list.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/refs",
+ importpath = "gvisor.dev/gvisor/pkg/refs",
visibility = ["//:sandbox"],
+ deps = [
+ "//pkg/log",
+ ],
)
go_test(
diff --git a/pkg/refs/refcounter.go b/pkg/refs/refcounter.go
index 20f515391..6ecbace9e 100644
--- a/pkg/refs/refcounter.go
+++ b/pkg/refs/refcounter.go
@@ -17,9 +17,14 @@
package refs
import (
+ "bytes"
+ "fmt"
"reflect"
+ "runtime"
"sync"
"sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/log"
)
// RefCounter is the interface to be implemented by objects that are reference
@@ -189,6 +194,16 @@ type AtomicRefCount struct {
// how these fields are used.
refCount int64
+ // name is the name of the type which owns this ref count.
+ //
+ // name is immutable after EnableLeakCheck is called.
+ name string
+
+ // stack optionally records the caller of EnableLeakCheck.
+ //
+ // stack is immutable after EnableLeakCheck is called.
+ stack []uintptr
+
// mu protects the list below.
mu sync.Mutex `state:"nosave"`
@@ -196,6 +211,148 @@ type AtomicRefCount struct {
weakRefs weakRefList `state:"nosave"`
}
+// LeakMode configures the leak checker.
+type LeakMode uint32
+
+const (
+ // uninitializedLeakChecking indicates that the leak checker has not yet been initialized.
+ uninitializedLeakChecking LeakMode = iota
+
+ // NoLeakChecking indicates that no effort should be made to check for
+ // leaks.
+ NoLeakChecking
+
+ // LeaksLogWarning indicates that a warning should be logged when leaks
+ // are found.
+ LeaksLogWarning
+
+ // LeaksLogTraces indicates that a trace collected during allocation
+ // should be logged when leaks are found.
+ LeaksLogTraces
+)
+
+// leakMode stores the current mode for the reference leak checker.
+//
+// Values must be one of the LeakMode values.
+//
+// leakMode must be accessed atomically.
+var leakMode uint32
+
+// SetLeakMode configures the reference leak checker.
+func SetLeakMode(mode LeakMode) {
+ atomic.StoreUint32(&leakMode, uint32(mode))
+}
+
+const maxStackFrames = 40
+
+type fileLine struct {
+ file string
+ line int
+}
+
+// A stackKey is a representation of a stack frame for use as a map key.
+//
+// The fileLine type is used as PC values seem to vary across collections, even
+// for the same call stack.
+type stackKey [maxStackFrames]fileLine
+
+var stackCache = struct {
+ sync.Mutex
+ entries map[stackKey][]uintptr
+}{entries: map[stackKey][]uintptr{}}
+
+func makeStackKey(pcs []uintptr) stackKey {
+ frames := runtime.CallersFrames(pcs)
+ var key stackKey
+ keySlice := key[:0]
+ for {
+ frame, more := frames.Next()
+ keySlice = append(keySlice, fileLine{frame.File, frame.Line})
+
+ if !more || len(keySlice) == len(key) {
+ break
+ }
+ }
+ return key
+}
+
+func recordStack() []uintptr {
+ pcs := make([]uintptr, maxStackFrames)
+ n := runtime.Callers(1, pcs)
+ if n == 0 {
+ // No pcs available. Stop now.
+ //
+ // This can happen if the first argument to runtime.Callers
+ // is large.
+ return nil
+ }
+ pcs = pcs[:n]
+ key := makeStackKey(pcs)
+ stackCache.Lock()
+ v, ok := stackCache.entries[key]
+ if !ok {
+ // Reallocate to prevent pcs from escaping.
+ v = append([]uintptr(nil), pcs...)
+ stackCache.entries[key] = v
+ }
+ stackCache.Unlock()
+ return v
+}
+
+func formatStack(pcs []uintptr) string {
+ frames := runtime.CallersFrames(pcs)
+ var trace bytes.Buffer
+ for {
+ frame, more := frames.Next()
+ fmt.Fprintf(&trace, "%s:%d: %s\n", frame.File, frame.Line, frame.Function)
+
+ if !more {
+ break
+ }
+ }
+ return trace.String()
+}
+
+func (r *AtomicRefCount) finalize() {
+ var note string
+ switch LeakMode(atomic.LoadUint32(&leakMode)) {
+ case NoLeakChecking:
+ return
+ case uninitializedLeakChecking:
+ note = "(Leak checker uninitialized): "
+ }
+ if n := r.ReadRefs(); n != 0 {
+ msg := fmt.Sprintf("%sAtomicRefCount %p owned by %q garbage collected with ref count of %d (want 0)", note, r, r.name, n)
+ if len(r.stack) != 0 {
+ msg += ":\nCaller:\n" + formatStack(r.stack)
+ }
+ log.Warningf(msg)
+ }
+}
+
+// EnableLeakCheck checks for reference leaks when the AtomicRefCount gets
+// garbage collected.
+//
+// This function adds a finalizer to the AtomicRefCount, so the AtomicRefCount
+// must be at the beginning of its parent.
+//
+// name is a friendly name that will be listed as the owner of the
+// AtomicRefCount in logs. It should be the name of the parent type, including
+// package.
+func (r *AtomicRefCount) EnableLeakCheck(name string) {
+ if name == "" {
+ panic("invalid name")
+ }
+ switch LeakMode(atomic.LoadUint32(&leakMode)) {
+ case NoLeakChecking:
+ return
+ case LeaksLogTraces:
+ r.stack = recordStack()
+ }
+ r.name = name
+ runtime.SetFinalizer(r, (*AtomicRefCount).finalize)
+}
+
// ReadRefs returns the current number of references. The returned count is
// inherently racy and is unsafe to use without external synchronization.
func (r *AtomicRefCount) ReadRefs() int64 {
@@ -208,6 +365,8 @@ func (r *AtomicRefCount) ReadRefs() int64 {
//
// The sanity check here is limited to real references, since if they have
// dropped beneath zero then the object should have been destroyed.
+//
+//go:nosplit
func (r *AtomicRefCount) IncRef() {
if v := atomic.AddInt64(&r.refCount, 1); v <= 0 {
panic("Incrementing non-positive ref count")
@@ -222,6 +381,8 @@ func (r *AtomicRefCount) IncRef() {
// To do this safely without a loop, a speculative reference is first acquired
// on the object. This allows multiple concurrent TryIncRef calls to
// distinguish other TryIncRef calls from genuine references held.
+//
+//go:nosplit
func (r *AtomicRefCount) TryIncRef() bool {
const speculativeRef = 1 << 32
v := atomic.AddInt64(&r.refCount, speculativeRef)
@@ -263,6 +424,7 @@ func (r *AtomicRefCount) dropWeakRef(w *WeakRef) {
// B: DecRef [real decrease]
// A: TryIncRef [transform speculative to real]
//
+//go:nosplit
func (r *AtomicRefCount) DecRefWithDestructor(destroy func()) {
switch v := atomic.AddInt64(&r.refCount, -1); {
case v < -1:
@@ -298,6 +460,8 @@ func (r *AtomicRefCount) DecRefWithDestructor(destroy func()) {
}
// DecRef decrements this object's reference count.
+//
+//go:nosplit
func (r *AtomicRefCount) DecRef() {
r.DecRefWithDestructor(nil)
}
diff --git a/pkg/seccomp/BUILD b/pkg/seccomp/BUILD
index 2a59ebbce..d1024e49d 100644
--- a/pkg/seccomp/BUILD
+++ b/pkg/seccomp/BUILD
@@ -27,7 +27,7 @@ go_library(
"seccomp_rules.go",
"seccomp_unsafe.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/seccomp",
+ importpath = "gvisor.dev/gvisor/pkg/seccomp",
visibility = ["//visibility:public"],
deps = [
"//pkg/abi/linux",
diff --git a/pkg/seccomp/seccomp.go b/pkg/seccomp/seccomp.go
index cc142a497..c7503f2cc 100644
--- a/pkg/seccomp/seccomp.go
+++ b/pkg/seccomp/seccomp.go
@@ -20,9 +20,9 @@ import (
"reflect"
"sort"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/bpf"
- "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/bpf"
+ "gvisor.dev/gvisor/pkg/log"
)
const (
@@ -244,7 +244,7 @@ func addSyscallArgsCheck(p *bpf.ProgramBuilder, rules []Rule, action linux.BPFAc
return nil
}
-// buildBSTProgram converts a binary tree started in 'root' into BPF code. The ouline of the code
+// buildBSTProgram converts a binary tree started in 'root' into BPF code. The outline of the code
// is as follows:
//
// // SYS_PIPE(22), root
diff --git a/pkg/seccomp/seccomp_amd64.go b/pkg/seccomp/seccomp_amd64.go
index 02dfb8d9f..00bf332c1 100644
--- a/pkg/seccomp/seccomp_amd64.go
+++ b/pkg/seccomp/seccomp_amd64.go
@@ -17,7 +17,7 @@
package seccomp
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/abi/linux"
)
const (
diff --git a/pkg/seccomp/seccomp_arm64.go b/pkg/seccomp/seccomp_arm64.go
index b575bcdbf..b62133f21 100644
--- a/pkg/seccomp/seccomp_arm64.go
+++ b/pkg/seccomp/seccomp_arm64.go
@@ -17,7 +17,7 @@
package seccomp
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/abi/linux"
)
const (
diff --git a/pkg/seccomp/seccomp_test.go b/pkg/seccomp/seccomp_test.go
index 47ecac6f7..353686ed3 100644
--- a/pkg/seccomp/seccomp_test.go
+++ b/pkg/seccomp/seccomp_test.go
@@ -27,9 +27,9 @@ import (
"testing"
"time"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/binary"
- "gvisor.googlesource.com/gvisor/pkg/bpf"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/bpf"
)
type seccompData struct {
diff --git a/pkg/seccomp/seccomp_test_victim.go b/pkg/seccomp/seccomp_test_victim.go
index afc2f755f..62ae1fd9f 100644
--- a/pkg/seccomp/seccomp_test_victim.go
+++ b/pkg/seccomp/seccomp_test_victim.go
@@ -22,7 +22,7 @@ import (
"os"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/seccomp"
+ "gvisor.dev/gvisor/pkg/seccomp"
)
func main() {
diff --git a/pkg/seccomp/seccomp_unsafe.go b/pkg/seccomp/seccomp_unsafe.go
index ebb6397e8..0a3d92854 100644
--- a/pkg/seccomp/seccomp_unsafe.go
+++ b/pkg/seccomp/seccomp_unsafe.go
@@ -18,7 +18,7 @@ import (
"syscall"
"unsafe"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/abi/linux"
)
// sockFprog is sock_fprog taken from <linux/filter.h>.
diff --git a/pkg/secio/BUILD b/pkg/secio/BUILD
index 2b4b87c61..f38fb39f3 100644
--- a/pkg/secio/BUILD
+++ b/pkg/secio/BUILD
@@ -8,7 +8,7 @@ go_library(
"full_reader.go",
"secio.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/secio",
+ importpath = "gvisor.dev/gvisor/pkg/secio",
visibility = ["//pkg/sentry:internal"],
)
diff --git a/pkg/segment/set.go b/pkg/segment/set.go
index 982eb3fdd..03e4f258f 100644
--- a/pkg/segment/set.go
+++ b/pkg/segment/set.go
@@ -1288,7 +1288,7 @@ func (s *Set) String() string {
return s.root.String()
}
-// String stringifes a node (and all of its children) for debugging.
+// String stringifies a node (and all of its children) for debugging.
func (n *node) String() string {
var buf bytes.Buffer
n.writeDebugString(&buf, "")
diff --git a/pkg/segment/test/BUILD b/pkg/segment/test/BUILD
index 81e929b8c..694486296 100644
--- a/pkg/segment/test/BUILD
+++ b/pkg/segment/test/BUILD
@@ -38,7 +38,7 @@ go_library(
"int_set.go",
"set_functions.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/segment/segment",
+ importpath = "gvisor.dev/gvisor/pkg/segment/segment",
deps = [
"//pkg/state",
],
diff --git a/pkg/sentry/arch/BUILD b/pkg/sentry/arch/BUILD
index 0c044bc33..7aace2d7b 100644
--- a/pkg/sentry/arch/BUILD
+++ b/pkg/sentry/arch/BUILD
@@ -21,7 +21,7 @@ go_library(
"stack.go",
"syscalls_amd64.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/arch",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/arch",
visibility = ["//:sandbox"],
deps = [
":registers_go_proto",
@@ -44,7 +44,7 @@ proto_library(
go_proto_library(
name = "registers_go_proto",
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/arch/registers_go_proto",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto",
proto = ":registers_proto",
visibility = ["//visibility:public"],
)
diff --git a/pkg/sentry/arch/arch.go b/pkg/sentry/arch/arch.go
index 53f0c9018..ace7d5b18 100644
--- a/pkg/sentry/arch/arch.go
+++ b/pkg/sentry/arch/arch.go
@@ -20,11 +20,11 @@ import (
"fmt"
"io"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/cpuid"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/limits"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/cpuid"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// Arch describes an architecture.
diff --git a/pkg/sentry/arch/arch_amd64.go b/pkg/sentry/arch/arch_amd64.go
index 135c2ee1f..9e7db8b30 100644
--- a/pkg/sentry/arch/arch_amd64.go
+++ b/pkg/sentry/arch/arch_amd64.go
@@ -22,10 +22,10 @@ import (
"math/rand"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/binary"
- "gvisor.googlesource.com/gvisor/pkg/cpuid"
- "gvisor.googlesource.com/gvisor/pkg/sentry/limits"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/cpuid"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// Host specifies the host architecture.
diff --git a/pkg/sentry/arch/arch_state_x86.go b/pkg/sentry/arch/arch_state_x86.go
index bb52d8db0..9061fcc86 100644
--- a/pkg/sentry/arch/arch_state_x86.go
+++ b/pkg/sentry/arch/arch_state_x86.go
@@ -18,8 +18,8 @@ import (
"fmt"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/cpuid"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/cpuid"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// ErrFloatingPoint indicates a failed restore due to unusable floating point
diff --git a/pkg/sentry/arch/arch_x86.go b/pkg/sentry/arch/arch_x86.go
index 4d167ce98..9294ac773 100644
--- a/pkg/sentry/arch/arch_x86.go
+++ b/pkg/sentry/arch/arch_x86.go
@@ -22,12 +22,12 @@ import (
"sync"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/binary"
- "gvisor.googlesource.com/gvisor/pkg/cpuid"
- "gvisor.googlesource.com/gvisor/pkg/log"
- rpb "gvisor.googlesource.com/gvisor/pkg/sentry/arch/registers_go_proto"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/cpuid"
+ "gvisor.dev/gvisor/pkg/log"
+ rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// System-related constants for x86.
diff --git a/pkg/sentry/arch/auxv.go b/pkg/sentry/arch/auxv.go
index 80c923103..4546b2ef9 100644
--- a/pkg/sentry/arch/auxv.go
+++ b/pkg/sentry/arch/auxv.go
@@ -15,7 +15,7 @@
package arch
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// An AuxEntry represents an entry in an ELF auxiliary vector.
diff --git a/pkg/sentry/arch/signal_amd64.go b/pkg/sentry/arch/signal_amd64.go
index aa030fd70..febd6f9b9 100644
--- a/pkg/sentry/arch/signal_amd64.go
+++ b/pkg/sentry/arch/signal_amd64.go
@@ -21,9 +21,9 @@ import (
"math"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// SignalAct represents the action that should be taken when a signal is
diff --git a/pkg/sentry/arch/signal_stack.go b/pkg/sentry/arch/signal_stack.go
index a442f9fdc..5a3228113 100644
--- a/pkg/sentry/arch/signal_stack.go
+++ b/pkg/sentry/arch/signal_stack.go
@@ -17,7 +17,7 @@
package arch
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
const (
diff --git a/pkg/sentry/arch/stack.go b/pkg/sentry/arch/stack.go
index 7e6324e82..7472c3c61 100644
--- a/pkg/sentry/arch/stack.go
+++ b/pkg/sentry/arch/stack.go
@@ -18,8 +18,8 @@ import (
"encoding/binary"
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// Stack is a simple wrapper around a usermem.IO and an address.
diff --git a/pkg/sentry/context/BUILD b/pkg/sentry/context/BUILD
index a3c8d0177..8dc1a77b1 100644
--- a/pkg/sentry/context/BUILD
+++ b/pkg/sentry/context/BUILD
@@ -5,7 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "context",
srcs = ["context.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/context",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/context",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/amutex",
diff --git a/pkg/sentry/context/context.go b/pkg/sentry/context/context.go
index d70f3a5c3..dfd62cbdb 100644
--- a/pkg/sentry/context/context.go
+++ b/pkg/sentry/context/context.go
@@ -16,8 +16,8 @@
package context
import (
- "gvisor.googlesource.com/gvisor/pkg/amutex"
- "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/amutex"
+ "gvisor.dev/gvisor/pkg/log"
)
type contextID int
diff --git a/pkg/sentry/context/contexttest/BUILD b/pkg/sentry/context/contexttest/BUILD
index d17b1bdcf..3b6841b7e 100644
--- a/pkg/sentry/context/contexttest/BUILD
+++ b/pkg/sentry/context/contexttest/BUILD
@@ -6,7 +6,7 @@ go_library(
name = "contexttest",
testonly = 1,
srcs = ["contexttest.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/context/contexttest",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/memutil",
diff --git a/pkg/sentry/context/contexttest/contexttest.go b/pkg/sentry/context/contexttest/contexttest.go
index 83da40711..15cf086a9 100644
--- a/pkg/sentry/context/contexttest/contexttest.go
+++ b/pkg/sentry/context/contexttest/contexttest.go
@@ -21,15 +21,15 @@ import (
"testing"
"time"
- "gvisor.googlesource.com/gvisor/pkg/memutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/limits"
- "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ptrace"
- "gvisor.googlesource.com/gvisor/pkg/sentry/uniqueid"
+ "gvisor.dev/gvisor/pkg/memutil"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
+ "gvisor.dev/gvisor/pkg/sentry/pgalloc"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/platform/ptrace"
+ "gvisor.dev/gvisor/pkg/sentry/uniqueid"
)
// Context returns a Context that may be used in tests. Uses ptrace as the
@@ -59,6 +59,7 @@ func Context(tb testing.TB) context.Context {
l: limits.NewLimitSet(),
mf: mf,
platform: p,
+ creds: auth.NewAnonymousCredentials(),
otherValues: make(map[interface{}]interface{}),
}
}
@@ -70,6 +71,7 @@ type TestContext struct {
l *limits.LimitSet
mf *pgalloc.MemoryFile
platform platform.Platform
+ creds *auth.Credentials
otherValues map[interface{}]interface{}
}
@@ -108,6 +110,8 @@ func (t *TestContext) RegisterValue(key, value interface{}) {
// Value implements context.Context.
func (t *TestContext) Value(key interface{}) interface{} {
switch key {
+ case auth.CtxCredentials:
+ return t.creds
case limits.CtxLimits:
return t.l
case pgalloc.CtxMemoryFile:
diff --git a/pkg/sentry/control/BUILD b/pkg/sentry/control/BUILD
index 5052bcc0d..bf802d1b6 100644
--- a/pkg/sentry/control/BUILD
+++ b/pkg/sentry/control/BUILD
@@ -6,11 +6,12 @@ go_library(
name = "control",
srcs = [
"control.go",
+ "logging.go",
"pprof.go",
"proc.go",
"state.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/control",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/control",
visibility = [
"//pkg/sentry:internal",
],
@@ -22,12 +23,13 @@ go_library(
"//pkg/sentry/fs/host",
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
- "//pkg/sentry/kernel/kdefs",
"//pkg/sentry/kernel/time",
"//pkg/sentry/limits",
"//pkg/sentry/state",
+ "//pkg/sentry/strace",
"//pkg/sentry/usage",
"//pkg/sentry/watchdog",
+ "//pkg/tcpip/link/sniffer",
"//pkg/urpc",
],
)
diff --git a/pkg/sentry/control/logging.go b/pkg/sentry/control/logging.go
new file mode 100644
index 000000000..811f24324
--- /dev/null
+++ b/pkg/sentry/control/logging.go
@@ -0,0 +1,136 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package control
+
+import (
+ "fmt"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/strace"
+ "gvisor.dev/gvisor/pkg/tcpip/link/sniffer"
+)
+
+// LoggingArgs are the arguments to use for changing the logging
+// level and strace list.
+type LoggingArgs struct {
+ // SetLevel is a flag used to indicate that we should update
+ // the logging level. We should be able to change the strace
+ // list without affecting the logging level and vice versa.
+ SetLevel bool
+
+ // Level is the log level that will be set if SetLevel is true.
+ Level log.Level
+
+ // SetLogPackets indicates that we should update the log packets flag.
+ SetLogPackets bool
+
+ // LogPackets is the actual value to set for LogPackets.
+ // SetLogPackets must be enabled to indicate that we're changing
+ // the value.
+ LogPackets bool
+
+ // SetStrace is a flag used to indicate that strace related
+ // arguments were passed in.
+ SetStrace bool
+
+ // EnableStrace is a flag from the CLI that specifies whether to
+ // enable strace at all. If this flag is false then a completely
+ // pristine copy of the syscall table will be swapped in. This
+ // approach is used to remain consistent with an empty strace
+ // whitelist meaning trace all system calls.
+ EnableStrace bool
+
+ // Strace is the whitelist of syscalls to trace to log. If this
+ // and StraceEventWhitelist are empty trace all system calls.
+ StraceWhitelist []string
+
+ // SetEventStrace is a flag used to indicate that event strace
+ // related arguments were passed in.
+ SetEventStrace bool
+
+ // StraceEventWhitelist is the whitelist of syscalls to trace
+ // to event log.
+ StraceEventWhitelist []string
+}
+
+// Logging provides functions related to logging.
+type Logging struct{}
+
+// Change will change the log level and strace arguments. Although
+// this functions signature requires an error it never acctually
+// return san error. It's required by the URPC interface.
+// Additionally, it may look odd that this is the only method
+// attached to an empty struct but this is also part of how
+// URPC dispatches.
+func (l *Logging) Change(args *LoggingArgs, code *int) error {
+ if args.SetLevel {
+ // Logging uses an atomic for the level so this is thread safe.
+ log.SetLevel(args.Level)
+ }
+
+ if args.SetLogPackets {
+ if args.LogPackets {
+ atomic.StoreUint32(&sniffer.LogPackets, 1)
+ } else {
+ atomic.StoreUint32(&sniffer.LogPackets, 0)
+ }
+ log.Infof("LogPackets set to: %v", atomic.LoadUint32(&sniffer.LogPackets))
+ }
+
+ if args.SetStrace {
+ if err := l.configureStrace(args); err != nil {
+ return fmt.Errorf("error configuring strace: %v", err)
+ }
+ }
+
+ if args.SetEventStrace {
+ if err := l.configureEventStrace(args); err != nil {
+ return fmt.Errorf("error configuring event strace: %v", err)
+ }
+ }
+
+ return nil
+}
+
+func (l *Logging) configureStrace(args *LoggingArgs) error {
+ if args.EnableStrace {
+ // Install the whitelist specified.
+ if len(args.StraceWhitelist) > 0 {
+ if err := strace.Enable(args.StraceWhitelist, strace.SinkTypeLog); err != nil {
+ return err
+ }
+ } else {
+ // For convenience, if strace is enabled but whitelist
+ // is empty, enable everything to log.
+ strace.EnableAll(strace.SinkTypeLog)
+ }
+ } else {
+ // Uninstall all strace functions.
+ strace.Disable(strace.SinkTypeLog)
+ }
+ return nil
+}
+
+func (l *Logging) configureEventStrace(args *LoggingArgs) error {
+ if len(args.StraceEventWhitelist) > 0 {
+ if err := strace.Enable(args.StraceEventWhitelist, strace.SinkTypeEvent); err != nil {
+ return err
+ }
+ } else {
+ strace.Disable(strace.SinkTypeEvent)
+ }
+ return nil
+}
diff --git a/pkg/sentry/control/pprof.go b/pkg/sentry/control/pprof.go
index d63916600..1f78d54a2 100644
--- a/pkg/sentry/control/pprof.go
+++ b/pkg/sentry/control/pprof.go
@@ -21,8 +21,8 @@ import (
"runtime/trace"
"sync"
- "gvisor.googlesource.com/gvisor/pkg/fd"
- "gvisor.googlesource.com/gvisor/pkg/urpc"
+ "gvisor.dev/gvisor/pkg/fd"
+ "gvisor.dev/gvisor/pkg/urpc"
)
var errNoOutput = errors.New("no output writer provided")
diff --git a/pkg/sentry/control/proc.go b/pkg/sentry/control/proc.go
index f7f02a3e1..6ae60c5cb 100644
--- a/pkg/sentry/control/proc.go
+++ b/pkg/sentry/control/proc.go
@@ -23,16 +23,15 @@ import (
"text/tabwriter"
"time"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/host"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/limits"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
- "gvisor.googlesource.com/gvisor/pkg/urpc"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/host"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/urpc"
)
// Proc includes task-related functions.
@@ -123,9 +122,8 @@ func ExecAsync(proc *Proc, args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadID
// TTYFileOperations that wraps the TTY is also returned.
func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadID, *host.TTYFileOperations, error) {
// Import file descriptors.
- l := limits.NewLimitSet()
- fdm := proc.Kernel.NewFDMap()
- defer fdm.DecRef()
+ fdTable := proc.Kernel.NewFDTable()
+ defer fdTable.DecRef()
// No matter what happens, we should close all files in the FilePayload
// before returning. Any files that are imported will be duped.
@@ -149,9 +147,9 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI
WorkingDirectory: args.WorkingDirectory,
Root: args.Root,
Credentials: creds,
- FDMap: fdm,
+ FDTable: fdTable,
Umask: 0022,
- Limits: l,
+ Limits: limits.NewLimitSet(),
MaxSymlinkTraversals: linux.MaxSymlinkTraversals,
UTSNamespace: proc.Kernel.RootUTSNamespace(),
IPCNamespace: proc.Kernel.RootIPCNamespace(),
@@ -212,7 +210,7 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI
}
// Add the file to the FD map.
- if err := fdm.NewFDAt(kdefs.FD(appFD), appFile, kernel.FDFlags{}, l); err != nil {
+ if err := fdTable.NewFDAt(ctx, int32(appFD), appFile, kernel.FDFlags{}); err != nil {
return nil, 0, nil, err
}
}
diff --git a/pkg/sentry/control/proc_test.go b/pkg/sentry/control/proc_test.go
index b7895d03c..d8ada2694 100644
--- a/pkg/sentry/control/proc_test.go
+++ b/pkg/sentry/control/proc_test.go
@@ -17,9 +17,9 @@ package control
import (
"testing"
- "gvisor.googlesource.com/gvisor/pkg/log"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/log"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
)
func init() {
diff --git a/pkg/sentry/control/state.go b/pkg/sentry/control/state.go
index 11efcaba1..41feeffe3 100644
--- a/pkg/sentry/control/state.go
+++ b/pkg/sentry/control/state.go
@@ -17,11 +17,11 @@ package control
import (
"errors"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/state"
- "gvisor.googlesource.com/gvisor/pkg/sentry/watchdog"
- "gvisor.googlesource.com/gvisor/pkg/urpc"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/state"
+ "gvisor.dev/gvisor/pkg/sentry/watchdog"
+ "gvisor.dev/gvisor/pkg/urpc"
)
// ErrInvalidFiles is returned when the urpc call to Save does not include an
diff --git a/pkg/sentry/device/BUILD b/pkg/sentry/device/BUILD
index 4ccf0674d..7e8918722 100644
--- a/pkg/sentry/device/BUILD
+++ b/pkg/sentry/device/BUILD
@@ -5,7 +5,7 @@ load("//tools/go_stateify:defs.bzl", "go_library", "go_test")
go_library(
name = "device",
srcs = ["device.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/device",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/device",
visibility = ["//pkg/sentry:internal"],
deps = ["//pkg/abi/linux"],
)
diff --git a/pkg/sentry/device/device.go b/pkg/sentry/device/device.go
index 458d03b30..47945d1a7 100644
--- a/pkg/sentry/device/device.go
+++ b/pkg/sentry/device/device.go
@@ -22,7 +22,7 @@ import (
"sync"
"sync/atomic"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/abi/linux"
)
// Registry tracks all simple devices and related state on the system for
diff --git a/pkg/sentry/fs/BUILD b/pkg/sentry/fs/BUILD
index 142a00840..d7259b47b 100644
--- a/pkg/sentry/fs/BUILD
+++ b/pkg/sentry/fs/BUILD
@@ -43,7 +43,7 @@ go_library(
"splice.go",
"sync.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/fs",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
@@ -69,6 +69,7 @@ go_library(
"//pkg/state",
"//pkg/syserror",
"//pkg/waiter",
+ "//third_party/gvsync",
],
)
diff --git a/pkg/sentry/fs/README.md b/pkg/sentry/fs/README.md
index f53ed3eaa..db4a1b730 100644
--- a/pkg/sentry/fs/README.md
+++ b/pkg/sentry/fs/README.md
@@ -69,7 +69,7 @@ Specifically this state is:
- An `fs.MountManager` containing mount points.
-- A `kernel.FDMap` containing pointers to open files.
+- A `kernel.FDTable` containing pointers to open files.
Anything else managed by the VFS that can be easily loaded into memory from a
filesystem is synced back to those filesystems and is not saved. Examples are
@@ -126,7 +126,7 @@ A mount point is restored in two steps:
- Second, during state.Load, each `fs.MountedFilesystem` optionally searches
for a mount in the `fs.RestoreEnvironment` that matches its saved device
- name. The `fs.MountedFilesystem` then restablishes a pointer to the root of
+ name. The `fs.MountedFilesystem` then reestablishes a pointer to the root of
the mounted filesystem. For example, the mount specification provides the
network connection for a mounted remote filesystem client to communicate
with its remote file server. The `fs.MountedFilesystem` also trivially loads
@@ -158,7 +158,7 @@ Otherwise an `fs.File` restores flags, an offset, and a unique identifier (only
used internally).
It may use the `fs.Inode`, which it indirectly holds a reference on through the
-`fs.Dirent`, to restablish an open file handle on the backing filesystem (e.g.
+`fs.Dirent`, to reestablish an open file handle on the backing filesystem (e.g.
to continue reading and writing).
## Overlay
diff --git a/pkg/sentry/fs/anon/BUILD b/pkg/sentry/fs/anon/BUILD
index 2111df2e8..ae1c9cf76 100644
--- a/pkg/sentry/fs/anon/BUILD
+++ b/pkg/sentry/fs/anon/BUILD
@@ -8,7 +8,7 @@ go_library(
"anon.go",
"device.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/anon",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/fs/anon",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
diff --git a/pkg/sentry/fs/anon/anon.go b/pkg/sentry/fs/anon/anon.go
index a6ea8b9e7..7323c7222 100644
--- a/pkg/sentry/fs/anon/anon.go
+++ b/pkg/sentry/fs/anon/anon.go
@@ -17,11 +17,11 @@
package anon
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// NewInode constructs an anonymous Inode that is not associated
@@ -33,7 +33,7 @@ func NewInode(ctx context.Context) *fs.Inode {
User: fs.PermMask{Read: true, Write: true},
}, linux.ANON_INODE_FS_MAGIC),
}
- return fs.NewInode(iops, fs.NewPseudoMountSource(), fs.StableAttr{
+ return fs.NewInode(ctx, iops, fs.NewPseudoMountSource(ctx), fs.StableAttr{
Type: fs.Anonymous,
DeviceID: PseudoDevice.DeviceID(),
InodeID: PseudoDevice.NextIno(),
diff --git a/pkg/sentry/fs/anon/device.go b/pkg/sentry/fs/anon/device.go
index 5927bd11e..d9ac14956 100644
--- a/pkg/sentry/fs/anon/device.go
+++ b/pkg/sentry/fs/anon/device.go
@@ -15,7 +15,7 @@
package anon
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/device"
+ "gvisor.dev/gvisor/pkg/sentry/device"
)
// PseudoDevice is the device on which all anonymous inodes reside.
diff --git a/pkg/sentry/fs/ashmem/BUILD b/pkg/sentry/fs/ashmem/BUILD
deleted file mode 100644
index ef1c31a3e..000000000
--- a/pkg/sentry/fs/ashmem/BUILD
+++ /dev/null
@@ -1,65 +0,0 @@
-package(licenses = ["notice"])
-
-load("//tools/go_stateify:defs.bzl", "go_library", "go_test")
-load("//tools/go_generics:defs.bzl", "go_template_instance")
-
-go_library(
- name = "ashmem",
- srcs = [
- "area.go",
- "device.go",
- "pin_board.go",
- "uint64_range.go",
- "uint64_set.go",
- ],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ashmem",
- visibility = ["//pkg/sentry:internal"],
- deps = [
- "//pkg/abi/linux",
- "//pkg/sentry/arch",
- "//pkg/sentry/context",
- "//pkg/sentry/fs",
- "//pkg/sentry/fs/fsutil",
- "//pkg/sentry/fs/tmpfs",
- "//pkg/sentry/kernel/time",
- "//pkg/sentry/memmap",
- "//pkg/sentry/usage",
- "//pkg/sentry/usermem",
- "//pkg/syserror",
- "//pkg/waiter",
- ],
-)
-
-go_test(
- name = "ashmem_test",
- size = "small",
- srcs = ["pin_board_test.go"],
- embed = [":ashmem"],
- deps = [
- "//pkg/abi/linux",
- "//pkg/sentry/usermem",
- ],
-)
-
-go_template_instance(
- name = "uint64_range",
- out = "uint64_range.go",
- package = "ashmem",
- template = "//pkg/segment:generic_range",
- types = {
- "T": "uint64",
- },
-)
-
-go_template_instance(
- name = "uint64_set",
- out = "uint64_set.go",
- package = "ashmem",
- template = "//pkg/segment:generic_set",
- types = {
- "Key": "uint64",
- "Range": "Range",
- "Value": "noValue",
- "Functions": "setFunctions",
- },
-)
diff --git a/pkg/sentry/fs/ashmem/area.go b/pkg/sentry/fs/ashmem/area.go
deleted file mode 100644
index b4b0cc08b..000000000
--- a/pkg/sentry/fs/ashmem/area.go
+++ /dev/null
@@ -1,308 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package ashmem
-
-import (
- "sync"
-
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/tmpfs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
-)
-
-const (
- // namePrefix is the name prefix assumed and forced by the Linux implementation.
- namePrefix = "dev/ashmem"
-
- // nameLen is the maximum name length.
- nameLen = 256
-)
-
-// Area implements fs.FileOperations.
-//
-// +stateify savable
-type Area struct {
- fsutil.FileNoFsync `state:"nosave"`
- fsutil.FileNoSplice `state:"nosave"`
- fsutil.FileNoopFlush `state:"nosave"`
- fsutil.FileNotDirReaddir `state:"nosave"`
- fsutil.FileUseInodeUnstableAttr `state:"nosave"`
- waiter.AlwaysReady `state:"nosave"`
-
- ad *Device
-
- // mu protects fields below.
- mu sync.Mutex `state:"nosave"`
- tmpfsFile *fs.File
- name string
- size uint64
- perms usermem.AccessType
- pb *PinBoard
-}
-
-// Release implements fs.FileOperations.Release.
-func (a *Area) Release() {
- a.mu.Lock()
- defer a.mu.Unlock()
- if a.tmpfsFile != nil {
- a.tmpfsFile.DecRef()
- a.tmpfsFile = nil
- }
-}
-
-// Seek implements fs.FileOperations.Seek.
-func (a *Area) Seek(ctx context.Context, file *fs.File, whence fs.SeekWhence, offset int64) (int64, error) {
- a.mu.Lock()
- defer a.mu.Unlock()
- if a.size == 0 {
- return 0, syserror.EINVAL
- }
- if a.tmpfsFile == nil {
- return 0, syserror.EBADF
- }
- return a.tmpfsFile.FileOperations.Seek(ctx, file, whence, offset)
-}
-
-// Read implements fs.FileOperations.Read.
-func (a *Area) Read(ctx context.Context, file *fs.File, dst usermem.IOSequence, offset int64) (int64, error) {
- a.mu.Lock()
- defer a.mu.Unlock()
- if a.size == 0 {
- return 0, nil
- }
- if a.tmpfsFile == nil {
- return 0, syserror.EBADF
- }
- return a.tmpfsFile.FileOperations.Read(ctx, file, dst, offset)
-}
-
-// Write implements fs.FileOperations.Write.
-func (a *Area) Write(ctx context.Context, file *fs.File, src usermem.IOSequence, offset int64) (int64, error) {
- return 0, syserror.ENOSYS
-}
-
-// ConfigureMMap implements fs.FileOperations.ConfigureMMap.
-func (a *Area) ConfigureMMap(ctx context.Context, file *fs.File, opts *memmap.MMapOpts) error {
- a.mu.Lock()
- defer a.mu.Unlock()
- if a.size == 0 {
- return syserror.EINVAL
- }
-
- if !a.perms.SupersetOf(opts.Perms) {
- return syserror.EPERM
- }
- opts.MaxPerms = opts.MaxPerms.Intersect(a.perms)
-
- if a.tmpfsFile == nil {
- tmpfsInodeOps := tmpfs.NewInMemoryFile(ctx, usage.Tmpfs, fs.UnstableAttr{})
- tmpfsInode := fs.NewInode(tmpfsInodeOps, fs.NewPseudoMountSource(), fs.StableAttr{})
- dirent := fs.NewDirent(tmpfsInode, namePrefix+"/"+a.name)
- tmpfsFile, err := tmpfsInode.GetFile(ctx, dirent, fs.FileFlags{Read: true, Write: true})
- // Drop the extra reference on the Dirent.
- dirent.DecRef()
-
- if err != nil {
- return err
- }
-
- // Truncate to the size set by ASHMEM_SET_SIZE ioctl.
- err = tmpfsInodeOps.Truncate(ctx, tmpfsInode, int64(a.size))
- if err != nil {
- return err
- }
- a.tmpfsFile = tmpfsFile
- a.pb = NewPinBoard()
- }
-
- return a.tmpfsFile.ConfigureMMap(ctx, opts)
-}
-
-// Ioctl implements fs.FileOperations.Ioctl.
-func (a *Area) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
- // Switch on ioctl request.
- switch args[1].Uint() {
- case linux.AshmemSetNameIoctl:
- name, err := usermem.CopyStringIn(ctx, io, args[2].Pointer(), nameLen-1, usermem.IOOpts{
- AddressSpaceActive: true,
- })
- if err != nil {
- return 0, err
- }
-
- a.mu.Lock()
- defer a.mu.Unlock()
-
- // Cannot set name for already mapped ashmem.
- if a.tmpfsFile != nil {
- return 0, syserror.EINVAL
- }
- a.name = name
- return 0, nil
-
- case linux.AshmemGetNameIoctl:
- a.mu.Lock()
- var local []byte
- if a.name != "" {
- nameLen := len([]byte(a.name))
- local = make([]byte, nameLen, nameLen+1)
- copy(local, []byte(a.name))
- local = append(local, 0)
- } else {
- nameLen := len([]byte(namePrefix))
- local = make([]byte, nameLen, nameLen+1)
- copy(local, []byte(namePrefix))
- local = append(local, 0)
- }
- a.mu.Unlock()
-
- if _, err := io.CopyOut(ctx, args[2].Pointer(), local, usermem.IOOpts{
- AddressSpaceActive: true,
- }); err != nil {
- return 0, syserror.EFAULT
- }
- return 0, nil
-
- case linux.AshmemSetSizeIoctl:
- a.mu.Lock()
- defer a.mu.Unlock()
-
- // Cannot set size for already mapped ashmem.
- if a.tmpfsFile != nil {
- return 0, syserror.EINVAL
- }
- a.size = uint64(args[2].SizeT())
- return 0, nil
-
- case linux.AshmemGetSizeIoctl:
- return uintptr(a.size), nil
-
- case linux.AshmemPinIoctl, linux.AshmemUnpinIoctl, linux.AshmemGetPinStatusIoctl:
- // Locking and unlocking is ok since once tmpfsFile is set, it won't be nil again
- // even after unmapping! Unlocking is needed in order to avoid a deadlock on
- // usermem.CopyObjectIn.
-
- // Cannot execute pin-related ioctls before mapping.
- a.mu.Lock()
- if a.tmpfsFile == nil {
- a.mu.Unlock()
- return 0, syserror.EINVAL
- }
- a.mu.Unlock()
-
- var pin linux.AshmemPin
- _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &pin, usermem.IOOpts{
- AddressSpaceActive: true,
- })
- if err != nil {
- return 0, syserror.EFAULT
- }
-
- a.mu.Lock()
- defer a.mu.Unlock()
- return a.pinOperation(pin, args[1].Uint())
-
- case linux.AshmemPurgeAllCachesIoctl:
- return 0, nil
-
- case linux.AshmemSetProtMaskIoctl:
- prot := uint64(args[2].ModeT())
- perms := usermem.AccessType{
- Read: prot&linux.PROT_READ != 0,
- Write: prot&linux.PROT_WRITE != 0,
- Execute: prot&linux.PROT_EXEC != 0,
- }
-
- a.mu.Lock()
- defer a.mu.Unlock()
-
- // Can only narrow prot mask.
- if !a.perms.SupersetOf(perms) {
- return 0, syserror.EINVAL
- }
-
- // TODO(b/30946773,gvisor.dev/issue/153): If personality flag
- // READ_IMPLIES_EXEC is set, set PROT_EXEC if PORT_READ is set.
-
- a.perms = perms
- return 0, nil
-
- case linux.AshmemGetProtMaskIoctl:
- return uintptr(a.perms.Prot()), nil
- default:
- // Ioctls irrelevant to Ashmem.
- return 0, syserror.EINVAL
- }
-}
-
-// pinOperation should only be called while holding a.mu.
-func (a *Area) pinOperation(pin linux.AshmemPin, op uint32) (uintptr, error) {
- // Page-align a.size for checks.
- pageAlignedSize, ok := usermem.Addr(a.size).RoundUp()
- if !ok {
- return 0, syserror.EINVAL
- }
- // Len 0 means everything onward.
- if pin.Len == 0 {
- pin.Len = uint32(pageAlignedSize) - pin.Offset
- }
- // Both Offset and Len have to be page-aligned.
- if pin.Offset%uint32(usermem.PageSize) != 0 {
- return 0, syserror.EINVAL
- }
- if pin.Len%uint32(usermem.PageSize) != 0 {
- return 0, syserror.EINVAL
- }
- // Adding Offset and Len must not cause an uint32 overflow.
- if end := pin.Offset + pin.Len; end < pin.Offset {
- return 0, syserror.EINVAL
- }
- // Pin range must not exceed a's size.
- if uint32(pageAlignedSize) < pin.Offset+pin.Len {
- return 0, syserror.EINVAL
- }
- // Handle each operation.
- r := RangeFromAshmemPin(pin)
- switch op {
- case linux.AshmemPinIoctl:
- if a.pb.PinRange(r) {
- return linux.AshmemWasPurged, nil
- }
- return linux.AshmemNotPurged, nil
-
- case linux.AshmemUnpinIoctl:
- // TODO(b/30946773): Implement purge on unpin.
- a.pb.UnpinRange(r)
- return 0, nil
-
- case linux.AshmemGetPinStatusIoctl:
- if a.pb.RangePinnedStatus(r) {
- return linux.AshmemIsPinned, nil
- }
- return linux.AshmemIsUnpinned, nil
-
- default:
- panic("unreachable")
- }
-
-}
diff --git a/pkg/sentry/fs/ashmem/device.go b/pkg/sentry/fs/ashmem/device.go
deleted file mode 100644
index 22e1530e9..000000000
--- a/pkg/sentry/fs/ashmem/device.go
+++ /dev/null
@@ -1,61 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package ashmem implements Android ashmem module (Anonymus Shared Memory).
-package ashmem
-
-import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
-)
-
-// Device implements fs.InodeOperations.
-//
-// +stateify savable
-type Device struct {
- fsutil.InodeGenericChecker `state:"nosave"`
- fsutil.InodeNoExtendedAttributes `state:"nosave"`
- fsutil.InodeNoopAllocate `state:"nosave"`
- fsutil.InodeNoopRelease `state:"nosave"`
- fsutil.InodeNoopTruncate `state:"nosave"`
- fsutil.InodeNoopWriteOut `state:"nosave"`
- fsutil.InodeNotDirectory `state:"nosave"`
- fsutil.InodeNotMappable `state:"nosave"`
- fsutil.InodeNotSocket `state:"nosave"`
- fsutil.InodeNotSymlink `state:"nosave"`
- fsutil.InodeVirtual `state:"nosave"`
-
- fsutil.InodeSimpleAttributes
-}
-
-var _ fs.InodeOperations = (*Device)(nil)
-
-// NewDevice creates and intializes a Device structure.
-func NewDevice(ctx context.Context, owner fs.FileOwner, fp fs.FilePermissions) *Device {
- return &Device{
- InodeSimpleAttributes: fsutil.NewInodeSimpleAttributes(ctx, owner, fp, linux.ANON_INODE_FS_MAGIC),
- }
-}
-
-// GetFile implements fs.InodeOperations.GetFile.
-func (ad *Device) GetFile(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
- return fs.NewFile(ctx, d, flags, &Area{
- ad: ad,
- tmpfsFile: nil,
- perms: usermem.AnyAccess,
- }), nil
-}
diff --git a/pkg/sentry/fs/ashmem/pin_board.go b/pkg/sentry/fs/ashmem/pin_board.go
deleted file mode 100644
index bdf23b371..000000000
--- a/pkg/sentry/fs/ashmem/pin_board.go
+++ /dev/null
@@ -1,127 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package ashmem
-
-import "gvisor.googlesource.com/gvisor/pkg/abi/linux"
-
-const maxUint64 = ^uint64(0)
-
-// setFunctions implements segment.Functions generated from segment.Functions for
-// uint64 Key and noValue Value. For more information, see the build file and
-// segment set implementation at pkg/segment/set.go.
-type setFunctions struct{}
-
-// noValue is a type of range attached value, which is irrelevant here.
-type noValue struct{}
-
-// MinKey implements segment.Functions.MinKey.
-func (setFunctions) MinKey() uint64 {
- return 0
-}
-
-// MaxKey implements segment.Functions.MaxKey.
-func (setFunctions) MaxKey() uint64 {
- return maxUint64
-}
-
-// ClearValue implements segment.Functions.ClearValue.
-func (setFunctions) ClearValue(*noValue) {
- return
-}
-
-// Merge implements segment.Functions.Merge.
-func (setFunctions) Merge(Range, noValue, Range, noValue) (noValue, bool) {
- return noValue{}, true
-}
-
-// Split implements segment.Functions.Split.
-func (setFunctions) Split(Range, noValue, uint64) (noValue, noValue) {
- return noValue{}, noValue{}
-}
-
-// PinBoard represents a set of pinned ranges in ashmem.
-//
-// segment.Set is used for implementation where segments represent
-// ranges of pinned bytes, while gaps represent ranges of unpinned
-// bytes. All ranges are page-aligned.
-//
-// +stateify savable
-type PinBoard struct {
- Set
-}
-
-// NewPinBoard creates a new pin board with all pages pinned.
-func NewPinBoard() *PinBoard {
- var pb PinBoard
- pb.PinRange(Range{0, maxUint64})
- return &pb
-}
-
-// PinRange pins all pages in the specified range and returns true
-// if there are any newly pinned pages.
-func (pb *PinBoard) PinRange(r Range) bool {
- pinnedPages := false
- for gap := pb.LowerBoundGap(r.Start); gap.Ok() && gap.Start() < r.End; {
- common := gap.Range().Intersect(r)
- if common.Length() == 0 {
- gap = gap.NextGap()
- continue
- }
- pinnedPages = true
- gap = pb.Insert(gap, common, noValue{}).NextGap()
- }
- return pinnedPages
-}
-
-// UnpinRange unpins all pages in the specified range.
-func (pb *PinBoard) UnpinRange(r Range) {
- for seg := pb.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; {
- common := seg.Range().Intersect(r)
- if common.Length() == 0 {
- seg = seg.NextSegment()
- continue
- }
- seg = pb.RemoveRange(common).NextSegment()
- }
-}
-
-// RangePinnedStatus returns false if there's at least one unpinned page in the
-// specified range.
-func (pb *PinBoard) RangePinnedStatus(r Range) bool {
- for gap := pb.LowerBoundGap(r.Start); gap.Ok() && gap.Start() < r.End; {
- common := gap.Range().Intersect(r)
- if common.Length() == 0 {
- gap = gap.NextGap()
- continue
- }
- return false
- }
- return true
-}
-
-// RangeFromAshmemPin converts ashmem's original pin structure
-// to Range.
-func RangeFromAshmemPin(ap linux.AshmemPin) Range {
- if ap.Len == 0 {
- return Range{
- uint64(ap.Offset),
- maxUint64,
- }
- }
- return Range{
- uint64(ap.Offset),
- uint64(ap.Offset) + uint64(ap.Len),
- }
-}
diff --git a/pkg/sentry/fs/ashmem/pin_board_test.go b/pkg/sentry/fs/ashmem/pin_board_test.go
deleted file mode 100644
index 24f5d86d6..000000000
--- a/pkg/sentry/fs/ashmem/pin_board_test.go
+++ /dev/null
@@ -1,130 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package ashmem
-
-import (
- "testing"
-
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
-)
-
-func TestPinBoard(t *testing.T) {
- pb := NewPinBoard()
-
- // Confirm that all pages are pinned.
- if !pb.RangePinnedStatus(RangeFromAshmemPin(linux.AshmemPin{0, 0})) {
- t.Errorf("RangePinnedStatus(all pages) returned false (unpinned) at start.")
- }
-
- // Unpin pages [1, 11) (counting from 0)
- pb.UnpinRange(RangeFromAshmemPin(linux.AshmemPin{
- usermem.PageSize,
- usermem.PageSize * 10,
- }))
-
- // Confirm that pages [1, 11) are unpinned and that page 0 and pages
- // larger than 10 are pinned.
- pinned := []linux.AshmemPin{
- {
- 0,
- usermem.PageSize,
- }, {
- usermem.PageSize * 11,
- 0,
- },
- }
-
- for _, pin := range pinned {
- if !pb.RangePinnedStatus(RangeFromAshmemPin(pin)) {
- t.Errorf("RangePinnedStatus(AshmemPin{offset (pages): %v, len (pages): %v}) returned false (unpinned).",
- pin.Offset, pin.Len)
- }
- }
-
- unpinned := []linux.AshmemPin{
- {
- usermem.PageSize,
- usermem.PageSize * 10,
- },
- }
-
- for _, pin := range unpinned {
- if pb.RangePinnedStatus(RangeFromAshmemPin(pin)) {
- t.Errorf("RangePinnedStatus(AshmemPin{offset (pages): %v, len (pages): %v}) returned true (pinned).",
- pin.Offset, pin.Len)
- }
- }
-
- // Pin pages [2, 6).
- pb.PinRange(RangeFromAshmemPin(linux.AshmemPin{
- usermem.PageSize * 2,
- usermem.PageSize * 4,
- }))
-
- // Confirm that pages 0, [2, 6) and pages larger than 10 are pinned
- // while others remain unpinned.
- pinned = []linux.AshmemPin{
- {
- 0,
- usermem.PageSize,
- },
- {
- usermem.PageSize * 2,
- usermem.PageSize * 4,
- },
- {
- usermem.PageSize * 11,
- 0,
- },
- }
-
- for _, pin := range pinned {
- if !pb.RangePinnedStatus(RangeFromAshmemPin(pin)) {
- t.Errorf("RangePinnedStatus(AshmemPin{offset (pages): %v, len (pages): %v}) returned false (unpinned).",
- pin.Offset, pin.Len)
- }
- }
-
- unpinned = []linux.AshmemPin{
- {
- usermem.PageSize,
- usermem.PageSize,
- }, {
- usermem.PageSize * 6,
- usermem.PageSize * 5,
- },
- }
-
- for _, pin := range unpinned {
- if pb.RangePinnedStatus(RangeFromAshmemPin(pin)) {
- t.Errorf("RangePinnedStatus(AshmemPin{offset (pages): %v, len (pages): %v}) returned true (pinned).",
- pin.Offset, pin.Len)
- }
- }
-
- // Status of a partially pinned range is unpinned.
- if pb.RangePinnedStatus(RangeFromAshmemPin(linux.AshmemPin{0, 0})) {
- t.Errorf("RangePinnedStatus(all pages) returned true (pinned).")
- }
-
- // Pin the whole range again.
- pb.PinRange(RangeFromAshmemPin(linux.AshmemPin{0, 0}))
-
- // Confirm that all pages are pinned.
- if !pb.RangePinnedStatus(RangeFromAshmemPin(linux.AshmemPin{0, 0})) {
- t.Errorf("RangePinnedStatus(all pages) returned false (unpinned) at start.")
- }
-}
diff --git a/pkg/sentry/fs/attr.go b/pkg/sentry/fs/attr.go
index 591e35e6a..9fc6a5bc2 100644
--- a/pkg/sentry/fs/attr.go
+++ b/pkg/sentry/fs/attr.go
@@ -19,11 +19,11 @@ import (
"os"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/p9"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
)
// InodeType enumerates types of Inodes.
@@ -89,6 +89,28 @@ func (n InodeType) String() string {
}
}
+// LinuxType returns the linux file type for this inode type.
+func (n InodeType) LinuxType() uint32 {
+ switch n {
+ case RegularFile, SpecialFile:
+ return linux.ModeRegular
+ case Directory, SpecialDirectory:
+ return linux.ModeDirectory
+ case Symlink:
+ return linux.ModeSymlink
+ case Pipe:
+ return linux.ModeNamedPipe
+ case CharacterDevice:
+ return linux.ModeCharacterDevice
+ case BlockDevice:
+ return linux.ModeBlockDevice
+ case Socket:
+ return linux.ModeSocket
+ default:
+ return 0
+ }
+}
+
// StableAttr contains Inode attributes that will be stable throughout the
// lifetime of the Inode.
//
diff --git a/pkg/sentry/fs/binder/BUILD b/pkg/sentry/fs/binder/BUILD
deleted file mode 100644
index 3710664d3..000000000
--- a/pkg/sentry/fs/binder/BUILD
+++ /dev/null
@@ -1,27 +0,0 @@
-package(licenses = ["notice"])
-
-load("//tools/go_stateify:defs.bzl", "go_library")
-
-go_library(
- name = "binder",
- srcs = [
- "binder.go",
- ],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/binder",
- visibility = ["//pkg/sentry:internal"],
- deps = [
- "//pkg/abi/linux",
- "//pkg/sentry/arch",
- "//pkg/sentry/context",
- "//pkg/sentry/fs",
- "//pkg/sentry/fs/fsutil",
- "//pkg/sentry/kernel",
- "//pkg/sentry/memmap",
- "//pkg/sentry/pgalloc",
- "//pkg/sentry/platform",
- "//pkg/sentry/usage",
- "//pkg/sentry/usermem",
- "//pkg/syserror",
- "//pkg/waiter",
- ],
-)
diff --git a/pkg/sentry/fs/binder/binder.go b/pkg/sentry/fs/binder/binder.go
deleted file mode 100644
index c78f1fc40..000000000
--- a/pkg/sentry/fs/binder/binder.go
+++ /dev/null
@@ -1,260 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package binder implements Android Binder IPC module.
-package binder
-
-import (
- "sync"
-
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
-)
-
-const (
- currentProtocolVersion = 8
-
- // mmapSizeLimit is the upper limit for mapped memory size in Binder.
- mmapSizeLimit = 4 * 1024 * 1024 // 4MB
-)
-
-// Device implements fs.InodeOperations.
-//
-// +stateify savable
-type Device struct {
- fsutil.InodeGenericChecker `state:"nosave"`
- fsutil.InodeNoExtendedAttributes `state:"nosave"`
- fsutil.InodeNoopAllocate `state:"nosave"`
- fsutil.InodeNoopRelease `state:"nosave"`
- fsutil.InodeNoopTruncate `state:"nosave"`
- fsutil.InodeNoopWriteOut `state:"nosave"`
- fsutil.InodeNotDirectory `state:"nosave"`
- fsutil.InodeNotMappable `state:"nosave"`
- fsutil.InodeNotSocket `state:"nosave"`
- fsutil.InodeNotSymlink `state:"nosave"`
- fsutil.InodeVirtual `state:"nosave"`
-
- fsutil.InodeSimpleAttributes
-}
-
-var _ fs.InodeOperations = (*Device)(nil)
-
-// NewDevice creates and intializes a Device structure.
-func NewDevice(ctx context.Context, owner fs.FileOwner, fp fs.FilePermissions) *Device {
- return &Device{
- InodeSimpleAttributes: fsutil.NewInodeSimpleAttributes(ctx, owner, fp, 0),
- }
-}
-
-// GetFile implements fs.InodeOperations.GetFile.
-//
-// TODO(b/30946773): Add functionality to GetFile: Additional fields will be
-// needed in the Device structure, initialize them here. Also, Device will need
-// to keep track of the created Procs in order to implement BINDER_READ_WRITE
-// ioctl.
-func (bd *Device) GetFile(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
- return fs.NewFile(ctx, d, flags, &Proc{
- bd: bd,
- task: kernel.TaskFromContext(ctx),
- mfp: pgalloc.MemoryFileProviderFromContext(ctx),
- }), nil
-}
-
-// Proc implements fs.FileOperations and fs.IoctlGetter.
-//
-// +stateify savable
-type Proc struct {
- fsutil.FileNoFsync `state:"nosave"`
- fsutil.FileNoSplice `state:"nosave"`
- fsutil.FileNotDirReaddir `state:"nosave"`
- fsutil.FileUseInodeUnstableAttr `state:"nosave"`
- waiter.AlwaysReady `state:"nosave"`
-
- bd *Device
- task *kernel.Task
- mfp pgalloc.MemoryFileProvider
-
- // mu protects fr.
- mu sync.Mutex `state:"nosave"`
-
- // mapped is memory allocated from mfp.MemoryFile() by AddMapping.
- mapped platform.FileRange
-}
-
-// Release implements fs.FileOperations.Release.
-func (bp *Proc) Release() {
- bp.mu.Lock()
- defer bp.mu.Unlock()
- if bp.mapped.Length() != 0 {
- bp.mfp.MemoryFile().DecRef(bp.mapped)
- }
-}
-
-// Seek implements fs.FileOperations.Seek.
-//
-// Binder doesn't support seek operation (unless in debug mode).
-func (bp *Proc) Seek(ctx context.Context, file *fs.File, whence fs.SeekWhence, offset int64) (int64, error) {
- return offset, syserror.EOPNOTSUPP
-}
-
-// Read implements fs.FileOperations.Read.
-//
-// Binder doesn't support read operation (unless in debug mode).
-func (bp *Proc) Read(ctx context.Context, file *fs.File, dst usermem.IOSequence, offset int64) (int64, error) {
- return 0, syserror.EOPNOTSUPP
-}
-
-// Write implements fs.FileOperations.Write.
-//
-// Binder doesn't support write operation.
-func (bp *Proc) Write(ctx context.Context, file *fs.File, src usermem.IOSequence, offset int64) (int64, error) {
- return 0, syserror.EOPNOTSUPP
-}
-
-// Flush implements fs.FileOperations.Flush.
-//
-// TODO(b/30946773): Implement.
-func (bp *Proc) Flush(ctx context.Context, file *fs.File) error {
- return nil
-}
-
-// ConfigureMMap implements fs.FileOperations.ConfigureMMap.
-func (bp *Proc) ConfigureMMap(ctx context.Context, file *fs.File, opts *memmap.MMapOpts) error {
- // Compare drivers/android/binder.c:binder_mmap().
- if caller := kernel.TaskFromContext(ctx); caller != bp.task {
- return syserror.EINVAL
- }
- if opts.Length > mmapSizeLimit {
- opts.Length = mmapSizeLimit
- }
- opts.MaxPerms.Write = false
-
- // TODO(b/30946773): Binder sets VM_DONTCOPY, preventing the created vma
- // from being copied across fork(), but we don't support this yet. As
- // a result, MMs containing a Binder mapping cannot be forked (MM.Fork will
- // fail when AddMapping returns EBUSY).
-
- return fsutil.GenericConfigureMMap(file, bp, opts)
-}
-
-// Ioctl implements fs.FileOperations.Ioctl.
-//
-// TODO(b/30946773): Implement.
-func (bp *Proc) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
- // Switch on ioctl request.
- switch uint32(args[1].Int()) {
- case linux.BinderVersionIoctl:
- ver := &linux.BinderVersion{
- ProtocolVersion: currentProtocolVersion,
- }
- // Copy result to user-space.
- _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), ver, usermem.IOOpts{
- AddressSpaceActive: true,
- })
- return 0, err
- case linux.BinderWriteReadIoctl:
- // TODO(b/30946773): Implement.
- fallthrough
- case linux.BinderSetIdleTimeoutIoctl:
- // TODO(b/30946773): Implement.
- fallthrough
- case linux.BinderSetMaxThreadsIoctl:
- // TODO(b/30946773): Implement.
- fallthrough
- case linux.BinderSetIdlePriorityIoctl:
- // TODO(b/30946773): Implement.
- fallthrough
- case linux.BinderSetContextMgrIoctl:
- // TODO(b/30946773): Implement.
- fallthrough
- case linux.BinderThreadExitIoctl:
- // TODO(b/30946773): Implement.
- return 0, syserror.ENOSYS
- default:
- // Ioctls irrelevant to Binder.
- return 0, syserror.EINVAL
- }
-}
-
-// AddMapping implements memmap.Mappable.AddMapping.
-func (bp *Proc) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, _ bool) error {
- bp.mu.Lock()
- defer bp.mu.Unlock()
- if bp.mapped.Length() != 0 {
- // mmap has been called before, which binder_mmap() doesn't like.
- return syserror.EBUSY
- }
- // Binder only allocates and maps a single page up-front
- // (drivers/android/binder.c:binder_mmap() => binder_update_page_range()).
- fr, err := bp.mfp.MemoryFile().Allocate(usermem.PageSize, usage.Anonymous)
- if err != nil {
- return err
- }
- bp.mapped = fr
- return nil
-}
-
-// RemoveMapping implements memmap.Mappable.RemoveMapping.
-func (*Proc) RemoveMapping(context.Context, memmap.MappingSpace, usermem.AddrRange, uint64, bool) {
- // Nothing to do. Notably, we don't free bp.mapped to allow another mmap.
-}
-
-// CopyMapping implements memmap.Mappable.CopyMapping.
-func (bp *Proc) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, _ bool) error {
- // Nothing to do. Notably, this is one case where CopyMapping isn't
- // equivalent to AddMapping, as AddMapping would return EBUSY.
- return nil
-}
-
-// Translate implements memmap.Mappable.Translate.
-func (bp *Proc) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) {
- // TODO(b/30946773): In addition to the page initially allocated and mapped
- // in AddMapping (Linux: binder_mmap), Binder allocates and maps pages for
- // each transaction (Linux: binder_ioctl => binder_ioctl_write_read =>
- // binder_thread_write => binder_transaction => binder_alloc_buf =>
- // binder_update_page_range). Since we don't actually implement
- // BinderWriteReadIoctl (Linux: BINDER_WRITE_READ), we only ever have the
- // first page.
- var err error
- if required.End > usermem.PageSize {
- err = &memmap.BusError{syserror.EFAULT}
- }
- if required.Start == 0 {
- return []memmap.Translation{
- {
- Source: memmap.MappableRange{0, usermem.PageSize},
- File: bp.mfp.MemoryFile(),
- Offset: bp.mapped.Start,
- Perms: usermem.AnyAccess,
- },
- }, err
- }
- return nil, err
-}
-
-// InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable.
-func (bp *Proc) InvalidateUnsavable(ctx context.Context) error {
- return nil
-}
diff --git a/pkg/sentry/fs/context.go b/pkg/sentry/fs/context.go
index c80ea0175..51b4c7ee1 100644
--- a/pkg/sentry/fs/context.go
+++ b/pkg/sentry/fs/context.go
@@ -15,9 +15,9 @@
package fs
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
)
// contextID is the fs package's type for context.Context.Value keys.
diff --git a/pkg/sentry/fs/copy_up.go b/pkg/sentry/fs/copy_up.go
index 41265704c..9ac62c84d 100644
--- a/pkg/sentry/fs/copy_up.go
+++ b/pkg/sentry/fs/copy_up.go
@@ -19,11 +19,11 @@ import (
"io"
"sync"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// copyUp copies a file in an overlay from a lower filesystem to an
diff --git a/pkg/sentry/fs/copy_up_test.go b/pkg/sentry/fs/copy_up_test.go
index 54810afca..1d80bf15a 100644
--- a/pkg/sentry/fs/copy_up_test.go
+++ b/pkg/sentry/fs/copy_up_test.go
@@ -22,10 +22,10 @@ import (
"sync"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/tmpfs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/contexttest"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ _ "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
const (
@@ -102,7 +102,7 @@ func makeOverlayTestFiles(t *testing.T) []*overlayTestFile {
if err != nil {
t.Fatalf("failed to mount tmpfs: %v", err)
}
- lowerRoot := fs.NewDirent(lower, "")
+ lowerRoot := fs.NewDirent(ctx, lower, "")
// Make a deep set of subdirectories that everyone shares.
next := lowerRoot
diff --git a/pkg/sentry/fs/dentry.go b/pkg/sentry/fs/dentry.go
index 7a2d4b180..6b2699f15 100644
--- a/pkg/sentry/fs/dentry.go
+++ b/pkg/sentry/fs/dentry.go
@@ -17,7 +17,7 @@ package fs
import (
"sort"
- "gvisor.googlesource.com/gvisor/pkg/sentry/device"
+ "gvisor.dev/gvisor/pkg/sentry/device"
)
// DentAttr is the metadata of a directory entry. It is a subset of StableAttr.
diff --git a/pkg/sentry/fs/dev/BUILD b/pkg/sentry/fs/dev/BUILD
index 6c4fdaba9..59de615fb 100644
--- a/pkg/sentry/fs/dev/BUILD
+++ b/pkg/sentry/fs/dev/BUILD
@@ -12,7 +12,7 @@ go_library(
"null.go",
"random.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/dev",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/fs/dev",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
@@ -20,8 +20,6 @@ go_library(
"//pkg/sentry/context",
"//pkg/sentry/device",
"//pkg/sentry/fs",
- "//pkg/sentry/fs/ashmem",
- "//pkg/sentry/fs/binder",
"//pkg/sentry/fs/fsutil",
"//pkg/sentry/fs/ramfs",
"//pkg/sentry/fs/tmpfs",
diff --git a/pkg/sentry/fs/dev/dev.go b/pkg/sentry/fs/dev/dev.go
index 34ac01173..d4bbd9807 100644
--- a/pkg/sentry/fs/dev/dev.go
+++ b/pkg/sentry/fs/dev/dev.go
@@ -18,13 +18,11 @@ package dev
import (
"math"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ashmem"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/binder"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/tmpfs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/ramfs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// Memory device numbers are from Linux's drivers/char/mem.c
@@ -40,8 +38,8 @@ const (
urandomDevMinor uint32 = 9
)
-func newCharacterDevice(iops fs.InodeOperations, msrc *fs.MountSource) *fs.Inode {
- return fs.NewInode(iops, msrc, fs.StableAttr{
+func newCharacterDevice(ctx context.Context, iops fs.InodeOperations, msrc *fs.MountSource) *fs.Inode {
+ return fs.NewInode(ctx, iops, msrc, fs.StableAttr{
DeviceID: devDevice.DeviceID(),
InodeID: devDevice.NextIno(),
BlockSize: usermem.PageSize,
@@ -49,8 +47,8 @@ func newCharacterDevice(iops fs.InodeOperations, msrc *fs.MountSource) *fs.Inode
})
}
-func newMemDevice(iops fs.InodeOperations, msrc *fs.MountSource, minor uint32) *fs.Inode {
- return fs.NewInode(iops, msrc, fs.StableAttr{
+func newMemDevice(ctx context.Context, iops fs.InodeOperations, msrc *fs.MountSource, minor uint32) *fs.Inode {
+ return fs.NewInode(ctx, iops, msrc, fs.StableAttr{
DeviceID: devDevice.DeviceID(),
InodeID: devDevice.NextIno(),
BlockSize: usermem.PageSize,
@@ -62,7 +60,7 @@ func newMemDevice(iops fs.InodeOperations, msrc *fs.MountSource, minor uint32) *
func newDirectory(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
iops := ramfs.NewDir(ctx, nil, fs.RootOwner, fs.FilePermsFromMode(0555))
- return fs.NewInode(iops, msrc, fs.StableAttr{
+ return fs.NewInode(ctx, iops, msrc, fs.StableAttr{
DeviceID: devDevice.DeviceID(),
InodeID: devDevice.NextIno(),
BlockSize: usermem.PageSize,
@@ -72,7 +70,7 @@ func newDirectory(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
func newSymlink(ctx context.Context, target string, msrc *fs.MountSource) *fs.Inode {
iops := ramfs.NewSymlink(ctx, fs.RootOwner, target)
- return fs.NewInode(iops, msrc, fs.StableAttr{
+ return fs.NewInode(ctx, iops, msrc, fs.StableAttr{
DeviceID: devDevice.DeviceID(),
InodeID: devDevice.NextIno(),
BlockSize: usermem.PageSize,
@@ -81,24 +79,24 @@ func newSymlink(ctx context.Context, target string, msrc *fs.MountSource) *fs.In
}
// New returns the root node of a device filesystem.
-func New(ctx context.Context, msrc *fs.MountSource, binderEnabled bool, ashmemEnabled bool) *fs.Inode {
+func New(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
contents := map[string]*fs.Inode{
"fd": newSymlink(ctx, "/proc/self/fd", msrc),
"stdin": newSymlink(ctx, "/proc/self/fd/0", msrc),
"stdout": newSymlink(ctx, "/proc/self/fd/1", msrc),
"stderr": newSymlink(ctx, "/proc/self/fd/2", msrc),
- "null": newMemDevice(newNullDevice(ctx, fs.RootOwner, 0666), msrc, nullDevMinor),
- "zero": newMemDevice(newZeroDevice(ctx, fs.RootOwner, 0666), msrc, zeroDevMinor),
- "full": newMemDevice(newFullDevice(ctx, fs.RootOwner, 0666), msrc, fullDevMinor),
+ "null": newMemDevice(ctx, newNullDevice(ctx, fs.RootOwner, 0666), msrc, nullDevMinor),
+ "zero": newMemDevice(ctx, newZeroDevice(ctx, fs.RootOwner, 0666), msrc, zeroDevMinor),
+ "full": newMemDevice(ctx, newFullDevice(ctx, fs.RootOwner, 0666), msrc, fullDevMinor),
// This is not as good as /dev/random in linux because go
// runtime uses sys_random and /dev/urandom internally.
// According to 'man 4 random', this will be sufficient unless
// application uses this to generate long-lived GPG/SSL/SSH
// keys.
- "random": newMemDevice(newRandomDevice(ctx, fs.RootOwner, 0444), msrc, randomDevMinor),
- "urandom": newMemDevice(newRandomDevice(ctx, fs.RootOwner, 0444), msrc, urandomDevMinor),
+ "random": newMemDevice(ctx, newRandomDevice(ctx, fs.RootOwner, 0444), msrc, randomDevMinor),
+ "urandom": newMemDevice(ctx, newRandomDevice(ctx, fs.RootOwner, 0444), msrc, urandomDevMinor),
"shm": tmpfs.NewDir(ctx, nil, fs.RootOwner, fs.FilePermsFromMode(0777), msrc),
@@ -118,18 +116,8 @@ func New(ctx context.Context, msrc *fs.MountSource, binderEnabled bool, ashmemEn
"ptmx": newSymlink(ctx, "pts/ptmx", msrc),
}
- if binderEnabled {
- binder := binder.NewDevice(ctx, fs.RootOwner, fs.FilePermsFromMode(0666))
- contents["binder"] = newCharacterDevice(binder, msrc)
- }
-
- if ashmemEnabled {
- ashmem := ashmem.NewDevice(ctx, fs.RootOwner, fs.FilePermsFromMode(0666))
- contents["ashmem"] = newCharacterDevice(ashmem, msrc)
- }
-
iops := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555))
- return fs.NewInode(iops, msrc, fs.StableAttr{
+ return fs.NewInode(ctx, iops, msrc, fs.StableAttr{
DeviceID: devDevice.DeviceID(),
InodeID: devDevice.NextIno(),
BlockSize: usermem.PageSize,
diff --git a/pkg/sentry/fs/dev/device.go b/pkg/sentry/fs/dev/device.go
index 9f4e41fc9..a0493474e 100644
--- a/pkg/sentry/fs/dev/device.go
+++ b/pkg/sentry/fs/dev/device.go
@@ -14,7 +14,7 @@
package dev
-import "gvisor.googlesource.com/gvisor/pkg/sentry/device"
+import "gvisor.dev/gvisor/pkg/sentry/device"
// devDevice is the pseudo-filesystem device.
var devDevice = device.NewAnonDevice()
diff --git a/pkg/sentry/fs/dev/fs.go b/pkg/sentry/fs/dev/fs.go
index 6096a40f8..55f8af704 100644
--- a/pkg/sentry/fs/dev/fs.go
+++ b/pkg/sentry/fs/dev/fs.go
@@ -15,19 +15,10 @@
package dev
import (
- "strconv"
-
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
)
-// Optional key containing boolean flag which specifies if Android Binder IPC should be enabled.
-const binderEnabledKey = "binder_enabled"
-
-// Optional key containing boolean flag which specifies if Android ashmem should be enabled.
-const ashmemEnabledKey = "ashmem_enabled"
-
// filesystem is a devtmpfs.
//
// +stateify savable
@@ -39,7 +30,7 @@ func init() {
fs.RegisterFilesystem(&filesystem{})
}
-// FilesystemName is the name underwhich the filesystem is registered.
+// FilesystemName is the name under which the filesystem is registered.
// Name matches drivers/base/devtmpfs.c:dev_fs_type.name.
const FilesystemName = "devtmpfs"
@@ -67,33 +58,7 @@ func (*filesystem) Flags() fs.FilesystemFlags {
// Mount returns a devtmpfs root that can be positioned in the vfs.
func (f *filesystem) Mount(ctx context.Context, device string, flags fs.MountSourceFlags, data string, _ interface{}) (*fs.Inode, error) {
- // device is always ignored.
// devtmpfs backed by ramfs ignores bad options. See fs/ramfs/inode.c:ramfs_parse_options.
// -> we should consider parsing the mode and backing devtmpfs by this.
-
- // Parse generic comma-separated key=value options.
- options := fs.GenericMountSourceOptions(data)
-
- // binerEnabledKey is optional and binder is disabled by default.
- binderEnabled := false
- if beStr, exists := options[binderEnabledKey]; exists {
- var err error
- binderEnabled, err = strconv.ParseBool(beStr)
- if err != nil {
- return nil, syserror.EINVAL
- }
- }
-
- // ashmemEnabledKey is optional and ashmem is disabled by default.
- ashmemEnabled := false
- if aeStr, exists := options[ashmemEnabledKey]; exists {
- var err error
- ashmemEnabled, err = strconv.ParseBool(aeStr)
- if err != nil {
- return nil, syserror.EINVAL
- }
- }
-
- // Construct the devtmpfs root.
- return New(ctx, fs.NewNonCachingMountSource(f, flags), binderEnabled, ashmemEnabled), nil
+ return New(ctx, fs.NewNonCachingMountSource(ctx, f, flags)), nil
}
diff --git a/pkg/sentry/fs/dev/full.go b/pkg/sentry/fs/dev/full.go
index 8f6c6da2d..07e0ea010 100644
--- a/pkg/sentry/fs/dev/full.go
+++ b/pkg/sentry/fs/dev/full.go
@@ -15,13 +15,13 @@
package dev
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// fullDevice is used to implement /dev/full.
diff --git a/pkg/sentry/fs/dev/null.go b/pkg/sentry/fs/dev/null.go
index 3f1accef8..4404b97ef 100644
--- a/pkg/sentry/fs/dev/null.go
+++ b/pkg/sentry/fs/dev/null.go
@@ -15,14 +15,14 @@
package dev
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/mm"
- "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/mm"
+ "gvisor.dev/gvisor/pkg/sentry/pgalloc"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// +stateify savable
@@ -97,6 +97,7 @@ func newZeroDevice(ctx context.Context, owner fs.FileOwner, mode linux.FileMode)
func (zd *zeroDevice) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
flags.Pread = true
flags.Pwrite = true
+ flags.NonSeekable = true
return fs.NewFile(ctx, dirent, flags, &zeroFileOperations{}), nil
}
diff --git a/pkg/sentry/fs/dev/random.go b/pkg/sentry/fs/dev/random.go
index e5a01a906..49cb92f6e 100644
--- a/pkg/sentry/fs/dev/random.go
+++ b/pkg/sentry/fs/dev/random.go
@@ -15,14 +15,14 @@
package dev
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/rand"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/rand"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/safemem"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// +stateify savable
diff --git a/pkg/sentry/fs/dirent.go b/pkg/sentry/fs/dirent.go
index a0a35c242..fbca06761 100644
--- a/pkg/sentry/fs/dirent.go
+++ b/pkg/sentry/fs/dirent.go
@@ -22,13 +22,13 @@ import (
"sync/atomic"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/refs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport"
- "gvisor.googlesource.com/gvisor/pkg/sentry/uniqueid"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
+ "gvisor.dev/gvisor/pkg/sentry/uniqueid"
+ "gvisor.dev/gvisor/pkg/syserror"
)
type globalDirentMap struct {
@@ -206,7 +206,7 @@ type Dirent struct {
// NewDirent returns a new root Dirent, taking the caller's reference on inode. The caller
// holds the only reference to the Dirent. Parents may call hashChild to parent this Dirent.
-func NewDirent(inode *Inode, name string) *Dirent {
+func NewDirent(ctx context.Context, inode *Inode, name string) *Dirent {
d := newDirent(inode, name)
allDirents.add(d)
d.userVisible = true
@@ -229,11 +229,13 @@ func newDirent(inode *Inode, name string) *Dirent {
if inode != nil {
inode.MountSource.IncDirentRefs()
}
- return &Dirent{
+ d := Dirent{
Inode: inode,
name: name,
children: make(map[string]*refs.WeakRef),
}
+ d.EnableLeakCheck("fs.Dirent")
+ return &d
}
// NewNegativeDirent returns a new root negative Dirent. Otherwise same as NewDirent.
@@ -918,7 +920,7 @@ type DirIterator interface {
// calls, and must start with the given offset.
//
// The caller must ensure that this operation is permitted.
- IterateDir(ctx context.Context, dirCtx *DirCtx, offset int) (int, error)
+ IterateDir(ctx context.Context, d *Dirent, dirCtx *DirCtx, offset int) (int, error)
}
// DirentReaddir serializes the directory entries of d including "." and "..".
@@ -948,9 +950,6 @@ func direntReaddir(ctx context.Context, d *Dirent, it DirIterator, root *Dirent,
if dirCtx.Serializer == nil {
panic("Dirent.Readdir: serializer must not be nil")
}
- if d.frozen {
- return d.readdirFrozen(root, offset, dirCtx)
- }
// Check that this is actually a directory before emitting anything.
// Once we have written entries for "." and "..", future errors from
@@ -959,6 +958,16 @@ func direntReaddir(ctx context.Context, d *Dirent, it DirIterator, root *Dirent,
return 0, syserror.ENOTDIR
}
+ // This is a special case for lseek(fd, 0, SEEK_END).
+ // See SeekWithDirCursor for more details.
+ if offset == FileMaxOffset {
+ return offset, nil
+ }
+
+ if d.frozen {
+ return d.readdirFrozen(root, offset, dirCtx)
+ }
+
// Collect attrs for "." and "..".
dot, dotdot := d.GetDotAttrs(root)
@@ -981,7 +990,7 @@ func direntReaddir(ctx context.Context, d *Dirent, it DirIterator, root *Dirent,
// it.IterateDir should be passed an offset that does not include the
// initial dot elements. We will add them back later.
offset -= 2
- newOffset, err := it.IterateDir(ctx, dirCtx, int(offset))
+ newOffset, err := it.IterateDir(ctx, d, dirCtx, int(offset))
if int64(newOffset) < offset {
panic(fmt.Sprintf("node.Readdir returned offset %v less than input offset %v", newOffset, offset))
}
@@ -1068,7 +1077,7 @@ func (d *Dirent) mount(ctx context.Context, inode *Inode) (newChild *Dirent, err
//
// Note that NewDirent returns with one reference taken; the reference
// is donated to the caller as the mount reference.
- replacement := NewDirent(inode, d.name)
+ replacement := NewDirent(ctx, inode, d.name)
replacement.mounted = true
weakRef, ok := d.parent.hashChild(replacement)
diff --git a/pkg/sentry/fs/dirent_cache.go b/pkg/sentry/fs/dirent_cache.go
index 71f2d11de..60a15a275 100644
--- a/pkg/sentry/fs/dirent_cache.go
+++ b/pkg/sentry/fs/dirent_cache.go
@@ -146,7 +146,7 @@ func (c *DirentCache) contains(d *Dirent) bool {
return c.list.Front() == d
}
-// Invalidate removes all Dirents from the cache, caling DecRef on each.
+// Invalidate removes all Dirents from the cache, calling DecRef on each.
func (c *DirentCache) Invalidate() {
if c == nil {
return
@@ -159,7 +159,7 @@ func (c *DirentCache) Invalidate() {
}
// setMaxSize sets cache max size. If current size is larger than max size, the
-// cache shrinks to acommodate the new max.
+// cache shrinks to accommodate the new max.
func (c *DirentCache) setMaxSize(max uint64) {
c.mu.Lock()
c.maxSize = max
diff --git a/pkg/sentry/fs/dirent_refs_test.go b/pkg/sentry/fs/dirent_refs_test.go
index db88d850e..884e3ff06 100644
--- a/pkg/sentry/fs/dirent_refs_test.go
+++ b/pkg/sentry/fs/dirent_refs_test.go
@@ -18,8 +18,8 @@ import (
"syscall"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
)
func newMockDirInode(ctx context.Context, cache *DirentCache) *Inode {
@@ -31,7 +31,7 @@ func TestWalkPositive(t *testing.T) {
// refs == -1 -> has been destroyed.
ctx := contexttest.Context(t)
- root := NewDirent(newMockDirInode(ctx, nil), "root")
+ root := NewDirent(ctx, newMockDirInode(ctx, nil), "root")
if got := root.ReadRefs(); got != 1 {
t.Fatalf("root has a ref count of %d, want %d", got, 1)
@@ -73,7 +73,7 @@ func TestWalkNegative(t *testing.T) {
// refs == -1 -> has been destroyed.
ctx := contexttest.Context(t)
- root := NewDirent(NewEmptyDir(ctx, nil), "root")
+ root := NewDirent(ctx, NewEmptyDir(ctx, nil), "root")
mn := root.Inode.InodeOperations.(*mockInodeOperationsLookupNegative)
if got := root.ReadRefs(); got != 1 {
@@ -144,7 +144,7 @@ type mockInodeOperationsLookupNegative struct {
func NewEmptyDir(ctx context.Context, cache *DirentCache) *Inode {
m := NewMockMountSource(cache)
- return NewInode(&mockInodeOperationsLookupNegative{
+ return NewInode(ctx, &mockInodeOperationsLookupNegative{
MockInodeOperations: NewMockInodeOperations(ctx),
}, m, StableAttr{Type: Directory})
}
@@ -162,7 +162,7 @@ func TestHashNegativeToPositive(t *testing.T) {
// refs == -1 -> has been destroyed.
ctx := contexttest.Context(t)
- root := NewDirent(NewEmptyDir(ctx, nil), "root")
+ root := NewDirent(ctx, NewEmptyDir(ctx, nil), "root")
name := "d"
_, err := root.walk(ctx, root, name, false)
@@ -215,7 +215,6 @@ func TestRevalidate(t *testing.T) {
// refs == 0 -> one reference.
// refs == -1 -> has been destroyed.
- ctx := contexttest.Context(t)
for _, test := range []struct {
// desc is the test's description.
desc string
@@ -233,7 +232,8 @@ func TestRevalidate(t *testing.T) {
},
} {
t.Run(test.desc, func(t *testing.T) {
- root := NewDirent(NewMockInodeRevalidate(ctx, test.makeNegative), "root")
+ ctx := contexttest.Context(t)
+ root := NewDirent(ctx, NewMockInodeRevalidate(ctx, test.makeNegative), "root")
name := "d"
d1, err := root.walk(ctx, root, name, false)
@@ -263,7 +263,7 @@ func NewMockInodeRevalidate(ctx context.Context, makeNegative bool) *Inode {
mn := NewMockInodeOperations(ctx)
m := NewMockMountSource(nil)
m.MountSourceOperations.(*MockMountSourceOps).revalidate = true
- return NewInode(&MockInodeOperationsRevalidate{MockInodeOperations: mn, makeNegative: makeNegative}, m, StableAttr{Type: Directory})
+ return NewInode(ctx, &MockInodeOperationsRevalidate{MockInodeOperations: mn, makeNegative: makeNegative}, m, StableAttr{Type: Directory})
}
func (m *MockInodeOperationsRevalidate) Lookup(ctx context.Context, dir *Inode, p string) (*Dirent, error) {
@@ -290,12 +290,12 @@ func TestCreateExtraRefs(t *testing.T) {
}{
{
desc: "Create caching",
- root: NewDirent(NewEmptyDir(ctx, NewDirentCache(1)), "root"),
+ root: NewDirent(ctx, NewEmptyDir(ctx, NewDirentCache(1)), "root"),
refs: 2,
},
{
desc: "Create not caching",
- root: NewDirent(NewEmptyDir(ctx, nil), "root"),
+ root: NewDirent(ctx, NewEmptyDir(ctx, nil), "root"),
refs: 1,
},
} {
@@ -328,11 +328,11 @@ func TestRemoveExtraRefs(t *testing.T) {
}{
{
desc: "Remove caching",
- root: NewDirent(NewEmptyDir(ctx, NewDirentCache(1)), "root"),
+ root: NewDirent(ctx, NewEmptyDir(ctx, NewDirentCache(1)), "root"),
},
{
desc: "Remove not caching",
- root: NewDirent(NewEmptyDir(ctx, nil), "root"),
+ root: NewDirent(ctx, NewEmptyDir(ctx, nil), "root"),
},
} {
t.Run(test.desc, func(t *testing.T) {
@@ -366,7 +366,6 @@ func TestRenameExtraRefs(t *testing.T) {
// refs == 0 -> one reference.
// refs == -1 -> has been destroyed.
- ctx := contexttest.Context(t)
for _, test := range []struct {
// desc is the test's description.
desc string
@@ -384,10 +383,12 @@ func TestRenameExtraRefs(t *testing.T) {
},
} {
t.Run(test.desc, func(t *testing.T) {
+ ctx := contexttest.Context(t)
+
dirAttr := StableAttr{Type: Directory}
- oldParent := NewDirent(NewMockInode(ctx, NewMockMountSource(test.cache), dirAttr), "old_parent")
- newParent := NewDirent(NewMockInode(ctx, NewMockMountSource(test.cache), dirAttr), "new_parent")
+ oldParent := NewDirent(ctx, NewMockInode(ctx, NewMockMountSource(test.cache), dirAttr), "old_parent")
+ newParent := NewDirent(ctx, NewMockInode(ctx, NewMockMountSource(test.cache), dirAttr), "new_parent")
renamed, err := oldParent.Walk(ctx, oldParent, "old_child")
if err != nil {
diff --git a/pkg/sentry/fs/dirent_state.go b/pkg/sentry/fs/dirent_state.go
index 18652b809..f623d6c0e 100644
--- a/pkg/sentry/fs/dirent_state.go
+++ b/pkg/sentry/fs/dirent_state.go
@@ -18,7 +18,7 @@ import (
"fmt"
"sync/atomic"
- "gvisor.googlesource.com/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/refs"
)
// beforeSave is invoked by stateify.
diff --git a/pkg/sentry/fs/ext/BUILD b/pkg/sentry/fs/ext/BUILD
new file mode 100644
index 000000000..3c2a02eac
--- /dev/null
+++ b/pkg/sentry/fs/ext/BUILD
@@ -0,0 +1,11 @@
+package(licenses = ["notice"])
+
+load("//tools/go_stateify:defs.bzl", "go_library")
+
+go_library(
+ name = "ext",
+ srcs = ["ext.go"],
+ importpath = "gvisor.dev/gvisor/pkg/sentry/fs/ext",
+ visibility = ["//pkg/sentry:internal"],
+ deps = ["//pkg/sentry/fs/ext/disklayout"],
+)
diff --git a/pkg/sentry/fs/ext/assets/README.md b/pkg/sentry/fs/ext/assets/README.md
new file mode 100644
index 000000000..b6eac4f40
--- /dev/null
+++ b/pkg/sentry/fs/ext/assets/README.md
@@ -0,0 +1,28 @@
+### Tiny Ext4 Image
+
+The image is of size 64Kb which supports 64 1k blocks and 16 inodes. This is the
+smallest size mkfs.ext4 works with.
+
+This image was generated using the following commands.
+
+```bash
+fallocate -l 64K tiny.ext4
+mkfs.ext4 -j tiny.ext4
+```
+
+You can mount it using:
+
+```bash
+sudo mount -o loop tiny.ext4 $MOUNTPOINT
+```
+
+`file.txt`, `bigfile.txt` and `symlink.txt` were added to this image by just
+mounting it and copying (while preserving links) those files to the mountpoint
+directory using:
+
+```bash
+sudo cp -P {file.txt,symlink.txt,bigfile.txt} $MOUNTPOINT/
+```
+
+The files in this directory mirror the contents and organisation of the files
+stored in the image.
diff --git a/pkg/sentry/fs/ext/assets/bigfile.txt b/pkg/sentry/fs/ext/assets/bigfile.txt
new file mode 100644
index 000000000..3857cf516
--- /dev/null
+++ b/pkg/sentry/fs/ext/assets/bigfile.txt
@@ -0,0 +1,41 @@
+Lorem ipsum dolor sit amet, consectetur adipiscing elit. Phasellus faucibus eleifend orci, ut ornare nibh faucibus eu. Cras at condimentum massa. Nullam luctus, elit non porttitor congue, sapien diam feugiat sapien, sed eleifend nulla mauris non arcu. Sed lacinia mauris magna, eu mollis libero varius sit amet. Donec mollis, quam convallis commodo posuere, dolor nisi placerat nisi, in faucibus augue mi eu lorem. In pharetra consectetur faucibus. Ut euismod ex efficitur egestas tincidunt. Maecenas condimentum ut ante in rutrum. Vivamus sed arcu tempor, faucibus turpis et, lacinia diam.
+
+Sed in lacus vel nisl interdum bibendum in sed justo. Nunc tellus risus, molestie vitae arcu sed, molestie tempus ligula. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Nunc risus neque, volutpat et ante non, ullamcorper condimentum ante. Aliquam sed metus in urna condimentum convallis. Vivamus ut libero mauris. Proin mollis posuere consequat. Vestibulum placerat mollis est et pulvinar.
+
+Donec rutrum odio ac diam pharetra, id fermentum magna cursus. Pellentesque in dapibus elit, et condimentum orci. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Suspendisse euismod dapibus est, id vestibulum mauris. Nulla facilisi. Nulla cursus gravida nisi. Phasellus vestibulum rutrum lectus, a dignissim mauris hendrerit vitae. In at elementum mauris. Integer vel efficitur velit. Nullam fringilla sapien mi, quis luctus neque efficitur ac. Aenean nec quam dapibus nunc commodo pharetra. Proin sapien mi, fermentum aliquet vulputate non, aliquet porttitor diam. Quisque lacinia, urna et finibus fermentum, nunc lacus vehicula ex, sed congue metus lectus ac quam. Aliquam erat volutpat. Suspendisse sodales, dolor ut tincidunt finibus, augue erat varius tellus, a interdum erat sem at nunc. Vestibulum cursus iaculis sapien, vitae feugiat dui auctor quis.
+
+Pellentesque nec maximus nulla, eu blandit diam. Maecenas quis arcu ornare, congue ante at, vehicula ipsum. Praesent feugiat mauris rutrum sem fermentum, nec luctus ipsum placerat. Pellentesque placerat ipsum at dignissim fringilla. Vivamus et posuere sem, eget hendrerit felis. Aenean vulputate, augue vel mollis feugiat, justo ipsum mollis dolor, eu mollis elit neque ut ipsum. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Fusce bibendum sem quam, vulputate laoreet mi dapibus imperdiet. Sed a purus non nibh pretium aliquet. Integer eget luctus augue, vitae tincidunt magna. Ut eros enim, egestas eu nulla et, lobortis egestas arcu. Cras id ipsum ac justo lacinia rutrum. Vivamus lectus leo, ultricies sed justo at, pellentesque feugiat magna. Ut sollicitudin neque elit, vel ornare mauris commodo id.
+
+Duis dapibus orci et sapien finibus finibus. Mauris eleifend, lacus at vestibulum maximus, quam ligula pharetra erat, sit amet dapibus neque elit vitae neque. In bibendum sollicitudin erat, eget ultricies tortor malesuada at. Sed sit amet orci turpis. Donec feugiat ligula nibh, molestie tincidunt lectus elementum id. Donec volutpat maximus nibh, in vulputate felis posuere eu. Cras tincidunt ullamcorper lacus. Phasellus porta lorem auctor, congue magna a, commodo elit.
+
+Etiam auctor mi quis elit sodales, eu pulvinar arcu condimentum. Aenean imperdiet risus et dapibus tincidunt. Nullam tincidunt dictum dui, sed commodo urna rutrum id. Ut mollis libero vel elit laoreet bibendum. Quisque arcu arcu, tincidunt at ultricies id, vulputate nec metus. In tristique posuere quam sit amet volutpat. Vivamus scelerisque et nunc at dapibus. Fusce finibus libero ut ligula pretium rhoncus. Mauris non elit in arcu finibus imperdiet. Pellentesque nec massa odio. Proin rutrum mauris non sagittis efficitur. Aliquam auctor quam at dignissim faucibus. Ut eget ligula in magna posuere ultricies vitae sit amet turpis. Duis maximus odio nulla. Donec gravida sem tristique tempus scelerisque.
+
+Interdum et malesuada fames ac ante ipsum primis in faucibus. Fusce pharetra magna vulputate aliquet tempus. Duis id hendrerit arcu. Quisque ut ex elit. Integer velit orci, venenatis ut sapien ac, placerat porttitor dui. Interdum et malesuada fames ac ante ipsum primis in faucibus. Nunc hendrerit cursus diam, hendrerit finibus ipsum scelerisque ut. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos.
+
+Nulla non euismod neque. Phasellus vel sapien eu metus pulvinar rhoncus. Suspendisse eu mollis tellus, quis vestibulum tortor. Maecenas interdum dolor sed nulla fermentum maximus. Donec imperdiet ullamcorper condimentum. Nam quis nibh ante. Praesent quis tellus ut tortor pulvinar blandit sit amet ut sapien. Vestibulum est orci, pellentesque vitae tristique sit amet, tristique non felis.
+
+Vivamus sodales pellentesque varius. Sed vel tempus ligula. Nulla tristique nisl vel dui facilisis, ac sodales augue hendrerit. Proin augue nisi, vestibulum quis augue nec, sagittis tincidunt velit. Vestibulum euismod, nulla nec sodales faucibus, urna sapien vulputate magna, id varius metus sapien ut neque. Duis in mollis urna, in scelerisque enim. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Nunc condimentum dictum turpis, et egestas neque dapibus eget. Quisque fringilla, dui eu venenatis eleifend, erat nibh lacinia urna, at lacinia lacus sapien eu dui. Duis eu erat ut mi lacinia convallis a sed ex.
+
+Fusce elit metus, tincidunt nec eleifend a, hendrerit nec ligula. Duis placerat finibus sollicitudin. In euismod porta tellus, in luctus justo bibendum bibendum. Maecenas at magna eleifend lectus tincidunt suscipit ut a ligula. Nulla tempor accumsan felis, fermentum dapibus est eleifend vitae. Mauris urna sem, fringilla at ultricies non, ultrices in arcu. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Nam vehicula nunc at laoreet imperdiet. Nunc tristique ut risus id aliquet. Integer eleifend massa orci.
+
+Vestibulum sed ante sollicitudin nisi fringilla bibendum nec vel quam. Sed pretium augue eu ligula congue pulvinar. Donec vitae magna tincidunt, pharetra lacus id, convallis nulla. Cras viverra nisl nisl, varius convallis leo vulputate nec. Morbi at consequat dui, sed aliquet metus. Sed suscipit fermentum mollis. Maecenas nec mi sodales, tincidunt purus in, tristique mauris. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec interdum mi in velit efficitur, quis ultrices ex imperdiet. Sed vestibulum, magna ut tristique pretium, mi ipsum placerat tellus, non tempor enim augue et ex. Pellentesque eget felis quis ante sodales viverra ac sed lacus. Donec suscipit tempus massa, eget laoreet massa molestie at.
+
+Aenean fringilla dui non aliquet consectetur. Fusce cursus quam nec orci hendrerit faucibus. Donec consequat suscipit enim, non volutpat lectus auctor interdum. Proin lorem purus, maximus vel orci vitae, suscipit egestas turpis. Donec risus urna, congue a sem eu, aliquet placerat odio. Morbi gravida tristique turpis, quis efficitur enim. Nunc interdum gravida ipsum vel facilisis. Nunc congue finibus sollicitudin. Quisque euismod aliquet lectus et tincidunt. Curabitur ultrices sem ut mi fringilla fermentum. Morbi pretium, nisi sit amet dapibus congue, dolor enim consectetur risus, a interdum ligula odio sed odio. Quisque facilisis, mi at suscipit gravida, nunc sapien cursus justo, ut luctus odio nulla quis leo. Integer condimentum lobortis mauris, non egestas tellus lobortis sit amet.
+
+In sollicitudin velit ac ante vehicula, vitae varius tortor mollis. In hac habitasse platea dictumst. Quisque et orci lorem. Integer malesuada fringilla luctus. Pellentesque malesuada, mi non lobortis porttitor, ante ligula vulputate ante, nec dictum risus eros sit amet sapien. Nulla aliquam lorem libero, ac varius nulla tristique eget. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Ut pellentesque mauris orci, vel consequat mi varius a. Ut sit amet elit vulputate, lacinia metus non, fermentum nisl. Pellentesque eu nisi sed quam egestas blandit. Duis sit amet lobortis dolor. Donec consectetur sem interdum, tristique elit sit amet, sodales lacus. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Fusce id aliquam augue. Sed pretium congue risus vitae lacinia. Vestibulum non vulputate risus, ut malesuada justo.
+
+Sed odio elit, consectetur ac mauris quis, consequat commodo libero. Fusce sodales velit vulputate pulvinar fermentum. Donec iaculis nec nisl eget faucibus. Mauris at dictum velit. Donec fermentum lectus eu viverra volutpat. Aliquam consequat facilisis lorem, cursus consequat dui bibendum ullamcorper. Pellentesque nulla magna, imperdiet at magna et, cursus egestas enim. Nullam semper molestie lectus sit amet semper. Duis eget tincidunt est. Integer id neque risus. Integer ultricies hendrerit vestibulum. Donec blandit blandit sagittis. Nunc consectetur vitae nisi consectetur volutpat.
+
+Nulla id lorem fermentum, efficitur magna a, hendrerit dui. Vivamus sagittis orci gravida, bibendum quam eget, molestie est. Phasellus nec enim tincidunt, volutpat sapien non, laoreet diam. Nulla posuere enim nec porttitor lobortis. Donec auctor odio ut orci eleifend, ut eleifend purus convallis. Interdum et malesuada fames ac ante ipsum primis in faucibus. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Ut hendrerit, purus eget viverra tincidunt, sem magna imperdiet libero, et aliquam turpis neque vitae elit. Maecenas semper varius iaculis. Cras non lorem quis quam bibendum eleifend in et libero. Curabitur at purus mauris. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vivamus porta diam sed elit eleifend gravida.
+
+Nulla facilisi. Ut ultricies diam vel diam consectetur, vel porta augue molestie. Fusce interdum sapien et metus facilisis pellentesque. Nulla convallis sem at nunc vehicula facilisis. Nam ac rutrum purus. Nunc bibendum, dolor sit amet tempus ullamcorper, lorem leo tempor sem, id fringilla nunc augue scelerisque augue. Nullam sit amet rutrum nisl. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae; Donec sed mauris gravida eros vehicula sagittis at eget orci. Cras elementum, eros at accumsan bibendum, libero neque blandit purus, vitae vestibulum libero massa ac nibh. Integer at placerat nulla. Mauris eu eleifend orci. Aliquam consequat ligula vitae erat porta lobortis. Duis fermentum elit ac aliquet ornare.
+
+Mauris eget cursus tellus, eget sodales purus. Aliquam malesuada, augue id vulputate finibus, nisi ex bibendum nisl, sit amet laoreet quam urna a dolor. Nullam ultricies, sapien eu laoreet consequat, erat eros dignissim diam, ultrices sodales lectus mauris et leo. Morbi lacinia eu ante at tempus. Sed iaculis finibus magna malesuada efficitur. Donec faucibus erat sit amet elementum feugiat. Praesent a placerat nisi. Etiam lacinia gravida diam, et sollicitudin sapien tincidunt ut.
+
+Maecenas felis quam, tincidunt vitae venenatis scelerisque, viverra vitae odio. Phasellus enim neque, ultricies suscipit malesuada sit amet, vehicula sit amet purus. Nulla placerat sit amet dui vel tincidunt. Nam quis neque vel magna commodo egestas. Vestibulum sagittis rutrum lorem ut congue. Maecenas vel ultrices tellus. Donec efficitur, urna ac consequat iaculis, lorem felis pharetra eros, eget faucibus orci lectus sit amet arcu.
+
+Ut a tempus nisi. Nulla facilisi. Praesent vulputate maximus mi et dapibus. Sed sit amet libero ac augue hendrerit efficitur in a sapien. Mauris placerat velit sit amet tellus sollicitudin faucibus. Donec egestas a magna ac suscipit. Duis enim sapien, mollis sed egestas et, vestibulum vel leo.
+
+Proin quis dapibus dui. Donec eu tincidunt nunc. Vivamus eget purus consectetur, maximus ante vitae, tincidunt elit. Aenean mattis dolor a gravida aliquam. Praesent quis tellus id sem maximus vulputate nec sed nulla. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Curabitur metus nulla, volutpat volutpat est eu, hendrerit congue erat. Aliquam sollicitudin augue ante. Sed sollicitudin, magna eu consequat elementum, mi augue ullamcorper felis, molestie imperdiet erat metus iaculis est. Proin ac tortor nisi. Pellentesque quis nisi risus. Integer enim sapien, tincidunt quis tortor id, accumsan venenatis mi. Nulla facilisi.
+
+Cras pretium sit amet quam congue maximus. Morbi lacus libero, imperdiet commodo massa sed, scelerisque placerat libero. Cras nisl nisi, consectetur sed bibendum eu, venenatis at enim. Proin sodales justo at quam aliquam, a consectetur mi ornare. Donec porta ac est sit amet efficitur. Suspendisse vestibulum tortor id neque imperdiet, id lacinia risus vehicula. Phasellus ac eleifend purus. Mauris vel gravida ante. Aliquam vitae lobortis risus. Sed vehicula consectetur tincidunt. Nam et justo vitae purus molestie consequat. Pellentesque ipsum ex, convallis quis blandit non, gravida et urna. Donec diam ligula amet.
diff --git a/pkg/sentry/fs/ext/assets/file.txt b/pkg/sentry/fs/ext/assets/file.txt
new file mode 100644
index 000000000..980a0d5f1
--- /dev/null
+++ b/pkg/sentry/fs/ext/assets/file.txt
@@ -0,0 +1 @@
+Hello World!
diff --git a/pkg/sentry/fs/ext/assets/symlink.txt b/pkg/sentry/fs/ext/assets/symlink.txt
new file mode 120000
index 000000000..4c330738c
--- /dev/null
+++ b/pkg/sentry/fs/ext/assets/symlink.txt
@@ -0,0 +1 @@
+file.txt \ No newline at end of file
diff --git a/pkg/sentry/fs/ext/assets/tiny.ext4 b/pkg/sentry/fs/ext/assets/tiny.ext4
new file mode 100644
index 000000000..a6859736d
--- /dev/null
+++ b/pkg/sentry/fs/ext/assets/tiny.ext4
Binary files differ
diff --git a/pkg/sentry/fs/ext/disklayout/BUILD b/pkg/sentry/fs/ext/disklayout/BUILD
new file mode 100644
index 000000000..e4cb26645
--- /dev/null
+++ b/pkg/sentry/fs/ext/disklayout/BUILD
@@ -0,0 +1,46 @@
+package(licenses = ["notice"])
+
+load("//tools/go_stateify:defs.bzl", "go_library", "go_test")
+
+go_library(
+ name = "disklayout",
+ srcs = [
+ "block_group.go",
+ "block_group_32.go",
+ "block_group_64.go",
+ "dirent.go",
+ "dirent_new.go",
+ "dirent_old.go",
+ "disklayout.go",
+ "inode.go",
+ "inode_new.go",
+ "inode_old.go",
+ "superblock.go",
+ "superblock_32.go",
+ "superblock_64.go",
+ "superblock_old.go",
+ "test_utils.go",
+ ],
+ importpath = "gvisor.dev/gvisor/pkg/sentry/fs/ext/disklayout",
+ visibility = ["//pkg/sentry:internal"],
+ deps = [
+ "//pkg/abi/linux",
+ "//pkg/binary",
+ "//pkg/sentry/fs",
+ "//pkg/sentry/kernel/auth",
+ "//pkg/sentry/kernel/time",
+ ],
+)
+
+go_test(
+ name = "disklayout_test",
+ size = "small",
+ srcs = [
+ "block_group_test.go",
+ "dirent_test.go",
+ "inode_test.go",
+ "superblock_test.go",
+ ],
+ embed = [":disklayout"],
+ deps = ["//pkg/sentry/kernel/time"],
+)
diff --git a/pkg/sentry/fs/ext/disklayout/block_group.go b/pkg/sentry/fs/ext/disklayout/block_group.go
new file mode 100644
index 000000000..32ea3d97d
--- /dev/null
+++ b/pkg/sentry/fs/ext/disklayout/block_group.go
@@ -0,0 +1,127 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package disklayout
+
+// BlockGroup represents a Linux ext block group descriptor. An ext file system
+// is split into a series of block groups. This provides an access layer to
+// information needed to access and use a block group.
+//
+// See https://www.kernel.org/doc/html/latest/filesystems/ext4/globals.html#block-group-descriptors.
+type BlockGroup interface {
+ // InodeTable returns the absolute block number of the block containing the
+ // inode table. This points to an array of Inode structs. Inode tables are
+ // statically allocated at mkfs time. The superblock records the number of
+ // inodes per group (length of this table) and the size of each inode struct.
+ InodeTable() uint64
+
+ // BlockBitmap returns the absolute block number of the block containing the
+ // block bitmap. This bitmap tracks the usage of data blocks within this block
+ // group and has its own checksum.
+ BlockBitmap() uint64
+
+ // InodeBitmap returns the absolute block number of the block containing the
+ // inode bitmap. This bitmap tracks the usage of this group's inode table
+ // entries and has its own checksum.
+ InodeBitmap() uint64
+
+ // ExclusionBitmap returns the absolute block number of the snapshot exclusion
+ // bitmap.
+ ExclusionBitmap() uint64
+
+ // FreeBlocksCount returns the number of free blocks in the group.
+ FreeBlocksCount() uint32
+
+ // FreeInodesCount returns the number of free inodes in the group.
+ FreeInodesCount() uint32
+
+ // DirectoryCount returns the number of inodes that represent directories
+ // under this block group.
+ DirectoryCount() uint32
+
+ // UnusedInodeCount returns the number of unused inodes beyond the last used
+ // inode in this group's inode table. As a result, we needn’t scan past the
+ // (InodesPerGroup - UnusedInodeCount())th entry in the inode table.
+ UnusedInodeCount() uint32
+
+ // BlockBitmapChecksum returns the block bitmap checksum. This is calculated
+ // using crc32c(FS UUID + group number + entire bitmap).
+ BlockBitmapChecksum() uint32
+
+ // InodeBitmapChecksum returns the inode bitmap checksum. This is calculated
+ // using crc32c(FS UUID + group number + entire bitmap).
+ InodeBitmapChecksum() uint32
+
+ // Checksum returns this block group's checksum.
+ //
+ // If SbMetadataCsum feature is set:
+ // - checksum is crc32c(FS UUID + group number + group descriptor
+ // structure) & 0xFFFF.
+ //
+ // If SbGdtCsum feature is set:
+ // - checksum is crc16(FS UUID + group number + group descriptor
+ // structure).
+ //
+ // SbMetadataCsum and SbGdtCsum should not be both set.
+ // If they are, Linux warns and asks to run fsck.
+ Checksum() uint16
+
+ // Flags returns BGFlags which represents the block group flags.
+ Flags() BGFlags
+}
+
+// These are the different block group flags.
+const (
+ // BgInodeUninit indicates that inode table and bitmap are not initialized.
+ BgInodeUninit uint16 = 0x1
+
+ // BgBlockUninit indicates that block bitmap is not initialized.
+ BgBlockUninit uint16 = 0x2
+
+ // BgInodeZeroed indicates that inode table is zeroed.
+ BgInodeZeroed uint16 = 0x4
+)
+
+// BGFlags represents all the different combinations of block group flags.
+type BGFlags struct {
+ InodeUninit bool
+ BlockUninit bool
+ InodeZeroed bool
+}
+
+// ToInt converts a BGFlags struct back to its 16-bit representation.
+func (f BGFlags) ToInt() uint16 {
+ var res uint16
+
+ if f.InodeUninit {
+ res |= BgInodeUninit
+ }
+ if f.BlockUninit {
+ res |= BgBlockUninit
+ }
+ if f.InodeZeroed {
+ res |= BgInodeZeroed
+ }
+
+ return res
+}
+
+// BGFlagsFromInt converts the 16-bit flag representation to a BGFlags struct.
+func BGFlagsFromInt(flags uint16) BGFlags {
+ return BGFlags{
+ InodeUninit: flags&BgInodeUninit > 0,
+ BlockUninit: flags&BgBlockUninit > 0,
+ InodeZeroed: flags&BgInodeZeroed > 0,
+ }
+}
diff --git a/pkg/sentry/fs/ext/disklayout/block_group_32.go b/pkg/sentry/fs/ext/disklayout/block_group_32.go
new file mode 100644
index 000000000..3e16c76db
--- /dev/null
+++ b/pkg/sentry/fs/ext/disklayout/block_group_32.go
@@ -0,0 +1,72 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package disklayout
+
+// BlockGroup32Bit emulates the first half of struct ext4_group_desc in
+// fs/ext4/ext4.h. It is the block group descriptor struct for ext2, ext3 and
+// 32-bit ext4 filesystems. It implements BlockGroup interface.
+type BlockGroup32Bit struct {
+ BlockBitmapLo uint32
+ InodeBitmapLo uint32
+ InodeTableLo uint32
+ FreeBlocksCountLo uint16
+ FreeInodesCountLo uint16
+ UsedDirsCountLo uint16
+ FlagsRaw uint16
+ ExcludeBitmapLo uint32
+ BlockBitmapChecksumLo uint16
+ InodeBitmapChecksumLo uint16
+ ItableUnusedLo uint16
+ ChecksumRaw uint16
+}
+
+// Compiles only if BlockGroup32Bit implements BlockGroup.
+var _ BlockGroup = (*BlockGroup32Bit)(nil)
+
+// InodeTable implements BlockGroup.InodeTable.
+func (bg *BlockGroup32Bit) InodeTable() uint64 { return uint64(bg.InodeTableLo) }
+
+// BlockBitmap implements BlockGroup.BlockBitmap.
+func (bg *BlockGroup32Bit) BlockBitmap() uint64 { return uint64(bg.BlockBitmapLo) }
+
+// InodeBitmap implements BlockGroup.InodeBitmap.
+func (bg *BlockGroup32Bit) InodeBitmap() uint64 { return uint64(bg.InodeBitmapLo) }
+
+// ExclusionBitmap implements BlockGroup.ExclusionBitmap.
+func (bg *BlockGroup32Bit) ExclusionBitmap() uint64 { return uint64(bg.ExcludeBitmapLo) }
+
+// FreeBlocksCount implements BlockGroup.FreeBlocksCount.
+func (bg *BlockGroup32Bit) FreeBlocksCount() uint32 { return uint32(bg.FreeBlocksCountLo) }
+
+// FreeInodesCount implements BlockGroup.FreeInodesCount.
+func (bg *BlockGroup32Bit) FreeInodesCount() uint32 { return uint32(bg.FreeInodesCountLo) }
+
+// DirectoryCount implements BlockGroup.DirectoryCount.
+func (bg *BlockGroup32Bit) DirectoryCount() uint32 { return uint32(bg.UsedDirsCountLo) }
+
+// UnusedInodeCount implements BlockGroup.UnusedInodeCount.
+func (bg *BlockGroup32Bit) UnusedInodeCount() uint32 { return uint32(bg.ItableUnusedLo) }
+
+// BlockBitmapChecksum implements BlockGroup.BlockBitmapChecksum.
+func (bg *BlockGroup32Bit) BlockBitmapChecksum() uint32 { return uint32(bg.BlockBitmapChecksumLo) }
+
+// InodeBitmapChecksum implements BlockGroup.InodeBitmapChecksum.
+func (bg *BlockGroup32Bit) InodeBitmapChecksum() uint32 { return uint32(bg.InodeBitmapChecksumLo) }
+
+// Checksum implements BlockGroup.Checksum.
+func (bg *BlockGroup32Bit) Checksum() uint16 { return bg.ChecksumRaw }
+
+// Flags implements BlockGroup.Flags.
+func (bg *BlockGroup32Bit) Flags() BGFlags { return BGFlagsFromInt(bg.FlagsRaw) }
diff --git a/pkg/sentry/fs/ext/disklayout/block_group_64.go b/pkg/sentry/fs/ext/disklayout/block_group_64.go
new file mode 100644
index 000000000..9a809197a
--- /dev/null
+++ b/pkg/sentry/fs/ext/disklayout/block_group_64.go
@@ -0,0 +1,93 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package disklayout
+
+// BlockGroup64Bit emulates struct ext4_group_desc in fs/ext4/ext4.h.
+// It is the block group descriptor struct for 64-bit ext4 filesystems.
+// It implements BlockGroup interface. It is an extension of the 32-bit
+// version of BlockGroup.
+type BlockGroup64Bit struct {
+ // We embed the 32-bit struct here because 64-bit version is just an extension
+ // of the 32-bit version.
+ BlockGroup32Bit
+
+ // 64-bit specific fields.
+ BlockBitmapHi uint32
+ InodeBitmapHi uint32
+ InodeTableHi uint32
+ FreeBlocksCountHi uint16
+ FreeInodesCountHi uint16
+ UsedDirsCountHi uint16
+ ItableUnusedHi uint16
+ ExcludeBitmapHi uint32
+ BlockBitmapChecksumHi uint16
+ InodeBitmapChecksumHi uint16
+ _ uint32 // Padding to 64 bytes.
+}
+
+// Compiles only if BlockGroup64Bit implements BlockGroup.
+var _ BlockGroup = (*BlockGroup64Bit)(nil)
+
+// Methods to override. Checksum() and Flags() are not overridden.
+
+// InodeTable implements BlockGroup.InodeTable.
+func (bg *BlockGroup64Bit) InodeTable() uint64 {
+ return (uint64(bg.InodeTableHi) << 32) | uint64(bg.InodeTableLo)
+}
+
+// BlockBitmap implements BlockGroup.BlockBitmap.
+func (bg *BlockGroup64Bit) BlockBitmap() uint64 {
+ return (uint64(bg.BlockBitmapHi) << 32) | uint64(bg.BlockBitmapLo)
+}
+
+// InodeBitmap implements BlockGroup.InodeBitmap.
+func (bg *BlockGroup64Bit) InodeBitmap() uint64 {
+ return (uint64(bg.InodeBitmapHi) << 32) | uint64(bg.InodeBitmapLo)
+}
+
+// ExclusionBitmap implements BlockGroup.ExclusionBitmap.
+func (bg *BlockGroup64Bit) ExclusionBitmap() uint64 {
+ return (uint64(bg.ExcludeBitmapHi) << 32) | uint64(bg.ExcludeBitmapLo)
+}
+
+// FreeBlocksCount implements BlockGroup.FreeBlocksCount.
+func (bg *BlockGroup64Bit) FreeBlocksCount() uint32 {
+ return (uint32(bg.FreeBlocksCountHi) << 16) | uint32(bg.FreeBlocksCountLo)
+}
+
+// FreeInodesCount implements BlockGroup.FreeInodesCount.
+func (bg *BlockGroup64Bit) FreeInodesCount() uint32 {
+ return (uint32(bg.FreeInodesCountHi) << 16) | uint32(bg.FreeInodesCountLo)
+}
+
+// DirectoryCount implements BlockGroup.DirectoryCount.
+func (bg *BlockGroup64Bit) DirectoryCount() uint32 {
+ return (uint32(bg.UsedDirsCountHi) << 16) | uint32(bg.UsedDirsCountLo)
+}
+
+// UnusedInodeCount implements BlockGroup.UnusedInodeCount.
+func (bg *BlockGroup64Bit) UnusedInodeCount() uint32 {
+ return (uint32(bg.ItableUnusedHi) << 16) | uint32(bg.ItableUnusedLo)
+}
+
+// BlockBitmapChecksum implements BlockGroup.BlockBitmapChecksum.
+func (bg *BlockGroup64Bit) BlockBitmapChecksum() uint32 {
+ return (uint32(bg.BlockBitmapChecksumHi) << 16) | uint32(bg.BlockBitmapChecksumLo)
+}
+
+// InodeBitmapChecksum implements BlockGroup.InodeBitmapChecksum.
+func (bg *BlockGroup64Bit) InodeBitmapChecksum() uint32 {
+ return (uint32(bg.InodeBitmapChecksumHi) << 16) | uint32(bg.InodeBitmapChecksumLo)
+}
diff --git a/pkg/abi/linux/ashmem.go b/pkg/sentry/fs/ext/disklayout/block_group_test.go
index 2a722abe0..0ef4294c0 100644
--- a/pkg/abi/linux/ashmem.go
+++ b/pkg/sentry/fs/ext/disklayout/block_group_test.go
@@ -1,4 +1,4 @@
-// Copyright 2018 The gVisor Authors.
+// Copyright 2019 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -12,18 +12,15 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-package linux
+package disklayout
-// Constants used by ashmem in pin-related ioctls.
-const (
- AshmemNotPurged = 0
- AshmemWasPurged = 1
- AshmemIsUnpinned = 0
- AshmemIsPinned = 1
+import (
+ "testing"
)
-// AshmemPin structure is used for pin-related ioctls.
-type AshmemPin struct {
- Offset uint32
- Len uint32
+// TestBlockGroupSize tests that the block group descriptor structs are of the
+// correct size.
+func TestBlockGroupSize(t *testing.T) {
+ assertSize(t, BlockGroup32Bit{}, 32)
+ assertSize(t, BlockGroup64Bit{}, 64)
}
diff --git a/pkg/sentry/fs/ext/disklayout/dirent.go b/pkg/sentry/fs/ext/disklayout/dirent.go
new file mode 100644
index 000000000..685bf57b8
--- /dev/null
+++ b/pkg/sentry/fs/ext/disklayout/dirent.go
@@ -0,0 +1,69 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package disklayout
+
+import (
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+)
+
+const (
+ // MaxFileName is the maximum length of an ext fs file's name.
+ MaxFileName = 255
+)
+
+var (
+ // inodeTypeByFileType maps ext4 file types to vfs inode types.
+ //
+ // See https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#ftype.
+ inodeTypeByFileType = map[uint8]fs.InodeType{
+ 0: fs.Anonymous,
+ 1: fs.RegularFile,
+ 2: fs.Directory,
+ 3: fs.CharacterDevice,
+ 4: fs.BlockDevice,
+ 5: fs.Pipe,
+ 6: fs.Socket,
+ 7: fs.Symlink,
+ }
+)
+
+// The Dirent interface should be implemented by structs representing ext
+// directory entries. These are for the linear classical directories which
+// just store a list of dirent structs. A directory is a series of data blocks
+// where is each data block contains a linear array of dirents. The last entry
+// of the block has a record size that takes it to the end of the block. The
+// end of the directory is when you read dirInode.Size() bytes from the blocks.
+//
+// See https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#linear-classic-directories.
+type Dirent interface {
+ // Inode returns the absolute inode number of the underlying inode.
+ // Inode number 0 signifies an unused dirent.
+ Inode() uint32
+
+ // RecordSize returns the record length of this dirent on disk. The next
+ // dirent in the dirent list should be read after these many bytes from
+ // the current dirent. Must be a multiple of 4.
+ RecordSize() uint16
+
+ // FileName returns the name of the file. Can be at most 255 is length.
+ FileName() string
+
+ // FileType returns the inode type of the underlying inode. This is a
+ // performance hack so that we do not have to read the underlying inode struct
+ // to know the type of inode. This will only work when the SbDirentFileType
+ // feature is set. If not, the second returned value will be false indicating
+ // that user code has to use the inode mode to extract the file type.
+ FileType() (fs.InodeType, bool)
+}
diff --git a/pkg/sentry/fs/ext/disklayout/dirent_new.go b/pkg/sentry/fs/ext/disklayout/dirent_new.go
new file mode 100644
index 000000000..29ae4a5c2
--- /dev/null
+++ b/pkg/sentry/fs/ext/disklayout/dirent_new.go
@@ -0,0 +1,61 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package disklayout
+
+import (
+ "fmt"
+
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+)
+
+// DirentNew represents the ext4 directory entry struct. This emulates Linux's
+// ext4_dir_entry_2 struct. The FileName can not be more than 255 bytes so we
+// only need 8 bits to store the NameLength. As a result, NameLength has been
+// shortened and the other 8 bits are used to encode the file type. Use the
+// FileTypeRaw field only if the SbDirentFileType feature is set.
+//
+// Note: This struct can be of variable size on disk. The one described below
+// is of maximum size and the FileName beyond NameLength bytes might contain
+// garbage.
+type DirentNew struct {
+ InodeNumber uint32
+ RecordLength uint16
+ NameLength uint8
+ FileTypeRaw uint8
+ FileNameRaw [MaxFileName]byte
+}
+
+// Compiles only if DirentNew implements Dirent.
+var _ Dirent = (*DirentNew)(nil)
+
+// Inode implements Dirent.Inode.
+func (d *DirentNew) Inode() uint32 { return d.InodeNumber }
+
+// RecordSize implements Dirent.RecordSize.
+func (d *DirentNew) RecordSize() uint16 { return d.RecordLength }
+
+// FileName implements Dirent.FileName.
+func (d *DirentNew) FileName() string {
+ return string(d.FileNameRaw[:d.NameLength])
+}
+
+// FileType implements Dirent.FileType.
+func (d *DirentNew) FileType() (fs.InodeType, bool) {
+ if inodeType, ok := inodeTypeByFileType[d.FileTypeRaw]; ok {
+ return inodeType, true
+ }
+
+ panic(fmt.Sprintf("unknown file type %v", d.FileTypeRaw))
+}
diff --git a/pkg/sentry/fs/ext/disklayout/dirent_old.go b/pkg/sentry/fs/ext/disklayout/dirent_old.go
new file mode 100644
index 000000000..2e0f9c812
--- /dev/null
+++ b/pkg/sentry/fs/ext/disklayout/dirent_old.go
@@ -0,0 +1,50 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package disklayout
+
+import "gvisor.dev/gvisor/pkg/sentry/fs"
+
+// DirentOld represents the old directory entry struct which does not contain
+// the file type. This emulates Linux's ext4_dir_entry struct. This is used in
+// ext2, ext3 and sometimes in ext4.
+//
+// Note: This struct can be of variable size on disk. The one described below
+// is of maximum size and the FileName beyond NameLength bytes might contain
+// garbage.
+type DirentOld struct {
+ InodeNumber uint32
+ RecordLength uint16
+ NameLength uint16
+ FileNameRaw [MaxFileName]byte
+}
+
+// Compiles only if DirentOld implements Dirent.
+var _ Dirent = (*DirentOld)(nil)
+
+// Inode implements Dirent.Inode.
+func (d *DirentOld) Inode() uint32 { return d.InodeNumber }
+
+// RecordSize implements Dirent.RecordSize.
+func (d *DirentOld) RecordSize() uint16 { return d.RecordLength }
+
+// FileName implements Dirent.FileName.
+func (d *DirentOld) FileName() string {
+ return string(d.FileNameRaw[:d.NameLength])
+}
+
+// FileType implements Dirent.FileType.
+func (d *DirentOld) FileType() (fs.InodeType, bool) {
+ return fs.Anonymous, false
+}
diff --git a/pkg/abi/linux/binder.go b/pkg/sentry/fs/ext/disklayout/dirent_test.go
index 63b08324a..cc6dff2c9 100644
--- a/pkg/abi/linux/binder.go
+++ b/pkg/sentry/fs/ext/disklayout/dirent_test.go
@@ -1,4 +1,4 @@
-// Copyright 2018 The gVisor Authors.
+// Copyright 2019 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -12,9 +12,17 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-package linux
+package disklayout
-// BinderVersion structure is used for BINDER_VERSION ioctl.
-type BinderVersion struct {
- ProtocolVersion int32
+import (
+ "testing"
+)
+
+// TestDirentSize tests that the dirent structs are of the correct
+// size.
+func TestDirentSize(t *testing.T) {
+ want := uintptr(263)
+
+ assertSize(t, DirentOld{}, want)
+ assertSize(t, DirentNew{}, want)
}
diff --git a/pkg/sentry/fs/ext/disklayout/disklayout.go b/pkg/sentry/fs/ext/disklayout/disklayout.go
new file mode 100644
index 000000000..bdf4e2132
--- /dev/null
+++ b/pkg/sentry/fs/ext/disklayout/disklayout.go
@@ -0,0 +1,50 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package disklayout provides Linux ext file system's disk level structures
+// which can be directly read into from the underlying device. Structs aim to
+// emulate structures `exactly` how they are layed out on disk.
+//
+// This library aims to be compatible with all ext(2/3/4) systems so it
+// provides a generic interface for all major structures and various
+// implementations (for different versions). The user code is responsible for
+// using appropriate implementations based on the underlying device.
+//
+// Interfacing all major structures here serves a few purposes:
+// - Abstracts away the complexity of the underlying structure from client
+// code. The client only has to figure out versioning on set up and then
+// can use these as black boxes and pass it higher up the stack.
+// - Having pointer receivers forces the user to use pointers to these
+// heavy structs. Hence, prevents the client code from unintentionally
+// copying these by value while passing the interface around.
+// - Version-based implementation selection is resolved on set up hence
+// avoiding per call overhead of choosing implementation.
+// - All interface methods are pretty light weight (do not take in any
+// parameters by design). Passing pointer arguments to interface methods
+// can lead to heap allocation as the compiler won't be able to perform
+// escape analysis on an unknown implementation at compile time.
+//
+// Notes:
+// - All fields in these structs are exported because binary.Read would
+// panic otherwise.
+// - All structures on disk are in little-endian order. Only jbd2 (journal)
+// structures are in big-endian order.
+// - All OS dependent fields in these structures will be interpretted using
+// the Linux version of that field.
+// - The suffix `Lo` in field names stands for lower bits of that field.
+// - The suffix `Hi` in field names stands for upper bits of that field.
+// - The suffix `Raw` has been added to indicate that the field is not split
+// into Lo and Hi fields and also to resolve name collision with the
+// respective interface.
+package disklayout
diff --git a/pkg/sentry/fs/ext/disklayout/inode.go b/pkg/sentry/fs/ext/disklayout/inode.go
new file mode 100644
index 000000000..b48001910
--- /dev/null
+++ b/pkg/sentry/fs/ext/disklayout/inode.go
@@ -0,0 +1,267 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package disklayout
+
+import (
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+)
+
+// The Inode interface must be implemented by structs representing ext inodes.
+// The inode stores all the metadata pertaining to the file (except for the
+// file name which is held by the directory entry). It does NOT expose all
+// fields and should be extended if need be.
+//
+// Some file systems (e.g. FAT) use the directory entry to store all this
+// information. Ext file systems do not so that they can support hard links.
+// However, ext4 cheats a little bit and duplicates the file type in the
+// directory entry for performance gains.
+//
+// See https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#index-nodes.
+type Inode interface {
+ // Mode returns the linux file mode which is majorly used to extract
+ // information like:
+ // - File permissions (read/write/execute by user/group/others).
+ // - Sticky, set UID and GID bits.
+ // - File type.
+ //
+ // Masks to extract this information are provided in pkg/abi/linux/file.go.
+ Mode() linux.FileMode
+
+ // UID returns the owner UID.
+ UID() auth.KUID
+
+ // GID returns the owner GID.
+ GID() auth.KGID
+
+ // Size returns the size of the file in bytes.
+ Size() uint64
+
+ // InodeSize returns the size of this inode struct in bytes.
+ // In ext2 and ext3, the inode struct and inode disk record size was fixed at
+ // 128 bytes. Ext4 makes it possible for the inode struct to be bigger.
+ // However, accessing any field beyond the 128 bytes marker must be verified
+ // using this method.
+ InodeSize() uint16
+
+ // AccessTime returns the last access time. Shows when the file was last read.
+ //
+ // If InExtendedAttr is set, then this should NOT be used because the
+ // underlying field is used to store the extended attribute value checksum.
+ AccessTime() time.Time
+
+ // ChangeTime returns the last change time. Shows when the file meta data
+ // (like permissions) was last changed.
+ //
+ // If InExtendedAttr is set, then this should NOT be used because the
+ // underlying field is used to store the lower 32 bits of the attribute
+ // value’s reference count.
+ ChangeTime() time.Time
+
+ // ModificationTime returns the last modification time. Shows when the file
+ // content was last modified.
+ //
+ // If InExtendedAttr is set, then this should NOT be used because
+ // the underlying field contains the number of the inode that owns the
+ // extended attribute.
+ ModificationTime() time.Time
+
+ // DeletionTime returns the deletion time. Inodes are marked as deleted by
+ // writing to the underlying field. FS tools can restore files until they are
+ // actually overwritten.
+ DeletionTime() time.Time
+
+ // LinksCount returns the number of hard links to this inode.
+ //
+ // Normally there is an upper limit on the number of hard links:
+ // - ext2/ext3 = 32,000
+ // - ext4 = 65,000
+ //
+ // This implies that an ext4 directory cannot have more than 64,998
+ // subdirectories because each subdirectory will have a hard link to the
+ // directory via the `..` entry. The directory has hard link via the `.` entry
+ // of its own. And finally the inode is initiated with 1 hard link (itself).
+ //
+ // The underlying value is reset to 1 if all the following hold:
+ // - Inode is a directory.
+ // - SbDirNlink is enabled.
+ // - Number of hard links is incremented past 64,999.
+ // Hard link value of 1 for a directory would indicate that the number of hard
+ // links is unknown because a directory can have minimum 2 hard links (itself
+ // and `.` entry).
+ LinksCount() uint16
+
+ // Flags returns InodeFlags which represents the inode flags.
+ Flags() InodeFlags
+
+ // Blocks returns the underlying inode.i_block array. This field is special
+ // and is used to store various kinds of things depending on the filesystem
+ // version and inode type.
+ // - In ext2/ext3, it contains the block map.
+ // - In ext4, it contains the extent tree.
+ // - For inline files, it contains the file contents.
+ // - For symlinks, it contains the link path (if it fits here).
+ //
+ // See https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#the-contents-of-inode-i-block.
+ Blocks() [60]byte
+}
+
+// Inode flags. This is not comprehensive and flags which were not used in
+// the Linux kernel have been excluded.
+const (
+ // InSync indicates that all writes to the file must be synchronous.
+ InSync = 0x8
+
+ // InImmutable indicates that this file is immutable.
+ InImmutable = 0x10
+
+ // InAppend indicates that this file can only be appended to.
+ InAppend = 0x20
+
+ // InNoDump indicates that teh dump(1) utility should not dump this file.
+ InNoDump = 0x40
+
+ // InNoAccessTime indicates that the access time of this inode must not be
+ // updated.
+ InNoAccessTime = 0x80
+
+ // InIndex indicates that this directory has hashed indexes.
+ InIndex = 0x1000
+
+ // InJournalData indicates that file data must always be written through a
+ // journal device.
+ InJournalData = 0x4000
+
+ // InDirSync indicates that all the directory entiry data must be written
+ // synchronously.
+ InDirSync = 0x10000
+
+ // InTopDir indicates that this inode is at the top of the directory hierarchy.
+ InTopDir = 0x20000
+
+ // InHugeFile indicates that this is a huge file.
+ InHugeFile = 0x40000
+
+ // InExtents indicates that this inode uses extents.
+ InExtents = 0x80000
+
+ // InExtendedAttr indicates that this inode stores a large extended attribute
+ // value in its data blocks.
+ InExtendedAttr = 0x200000
+
+ // InInline indicates that this inode has inline data.
+ InInline = 0x10000000
+
+ // InReserved indicates that this inode is reserved for the ext4 library.
+ InReserved = 0x80000000
+)
+
+// InodeFlags represents all possible combinations of inode flags. It aims to
+// cover the bit masks and provide a more user-friendly interface.
+type InodeFlags struct {
+ Sync bool
+ Immutable bool
+ Append bool
+ NoDump bool
+ NoAccessTime bool
+ Index bool
+ JournalData bool
+ DirSync bool
+ TopDir bool
+ HugeFile bool
+ Extents bool
+ ExtendedAttr bool
+ Inline bool
+ Reserved bool
+}
+
+// ToInt converts inode flags back to its 32-bit rep.
+func (f InodeFlags) ToInt() uint32 {
+ var res uint32
+
+ if f.Sync {
+ res |= InSync
+ }
+ if f.Immutable {
+ res |= InImmutable
+ }
+ if f.Append {
+ res |= InAppend
+ }
+ if f.NoDump {
+ res |= InNoDump
+ }
+ if f.NoAccessTime {
+ res |= InNoAccessTime
+ }
+ if f.Index {
+ res |= InIndex
+ }
+ if f.JournalData {
+ res |= InJournalData
+ }
+ if f.DirSync {
+ res |= InDirSync
+ }
+ if f.TopDir {
+ res |= InTopDir
+ }
+ if f.HugeFile {
+ res |= InHugeFile
+ }
+ if f.Extents {
+ res |= InExtents
+ }
+ if f.ExtendedAttr {
+ res |= InExtendedAttr
+ }
+ if f.Inline {
+ res |= InInline
+ }
+ if f.Reserved {
+ res |= InReserved
+ }
+
+ return res
+}
+
+// InodeFlagsFromInt converts the integer representation of inode flags to
+// a InodeFlags struct.
+func InodeFlagsFromInt(f uint32) InodeFlags {
+ return InodeFlags{
+ Sync: f&InSync > 0,
+ Immutable: f&InImmutable > 0,
+ Append: f&InAppend > 0,
+ NoDump: f&InNoDump > 0,
+ NoAccessTime: f&InNoAccessTime > 0,
+ Index: f&InIndex > 0,
+ JournalData: f&InJournalData > 0,
+ DirSync: f&InDirSync > 0,
+ TopDir: f&InTopDir > 0,
+ HugeFile: f&InHugeFile > 0,
+ Extents: f&InExtents > 0,
+ ExtendedAttr: f&InExtendedAttr > 0,
+ Inline: f&InInline > 0,
+ Reserved: f&InReserved > 0,
+ }
+}
+
+// These masks define how users can view/modify inode flags. The rest of the
+// flags are for internal kernel usage only.
+const (
+ InUserReadFlagMask = 0x4BDFFF
+ InUserWriteFlagMask = 0x4B80FF
+)
diff --git a/pkg/sentry/fs/ext/disklayout/inode_new.go b/pkg/sentry/fs/ext/disklayout/inode_new.go
new file mode 100644
index 000000000..4f5348372
--- /dev/null
+++ b/pkg/sentry/fs/ext/disklayout/inode_new.go
@@ -0,0 +1,96 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package disklayout
+
+import "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+
+// InodeNew represents ext4 inode structure which can be bigger than
+// OldInodeSize. The actual size of this struct should be determined using
+// inode.ExtraInodeSize. Accessing any field here should be verified with the
+// actual size. The extra space between the end of the inode struct and end of
+// the inode record can be used to store extended attr.
+//
+// If the TimeExtra fields are in scope, the lower 2 bits of those are used
+// to extend their counter part to be 34 bits wide; the rest (upper) 30 bits
+// are used to provide nanoscond precision. Hence, these timestamps will now
+// overflow in May 2446.
+// See https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#inode-timestamps.
+type InodeNew struct {
+ InodeOld
+
+ ExtraInodeSize uint16
+ ChecksumHi uint16
+ ChangeTimeExtra uint32
+ ModificationTimeExtra uint32
+ AccessTimeExtra uint32
+ CreationTime uint32
+ CreationTimeExtra uint32
+ VersionHi uint32
+ ProjectID uint32
+}
+
+// Compiles only if InodeNew implements Inode.
+var _ Inode = (*InodeNew)(nil)
+
+// fromExtraTime decodes the extra time and constructs the kernel time struct
+// with nanosecond precision.
+func fromExtraTime(lo int32, extra uint32) time.Time {
+ // See description above InodeNew for format.
+ seconds := (int64(extra&0x3) << 32) + int64(lo)
+ nanoseconds := int64(extra >> 2)
+ return time.FromUnix(seconds, nanoseconds)
+}
+
+// Only override methods which change due to ext4 specific fields.
+
+// Size implements Inode.Size.
+func (in *InodeNew) Size() uint64 {
+ return (uint64(in.SizeHi) << 32) | uint64(in.SizeLo)
+}
+
+// InodeSize implements Inode.InodeSize.
+func (in *InodeNew) InodeSize() uint16 {
+ return oldInodeSize + in.ExtraInodeSize
+}
+
+// ChangeTime implements Inode.ChangeTime.
+func (in *InodeNew) ChangeTime() time.Time {
+ // Apply new timestamp logic if inode.ChangeTimeExtra is in scope.
+ if in.ExtraInodeSize >= 8 {
+ return fromExtraTime(in.ChangeTimeRaw, in.ChangeTimeExtra)
+ }
+
+ return in.InodeOld.ChangeTime()
+}
+
+// ModificationTime implements Inode.ModificationTime.
+func (in *InodeNew) ModificationTime() time.Time {
+ // Apply new timestamp logic if inode.ModificationTimeExtra is in scope.
+ if in.ExtraInodeSize >= 12 {
+ return fromExtraTime(in.ModificationTimeRaw, in.ModificationTimeExtra)
+ }
+
+ return in.InodeOld.ModificationTime()
+}
+
+// AccessTime implements Inode.AccessTime.
+func (in *InodeNew) AccessTime() time.Time {
+ // Apply new timestamp logic if inode.AccessTimeExtra is in scope.
+ if in.ExtraInodeSize >= 16 {
+ return fromExtraTime(in.AccessTimeRaw, in.AccessTimeExtra)
+ }
+
+ return in.InodeOld.AccessTime()
+}
diff --git a/pkg/sentry/fs/ext/disklayout/inode_old.go b/pkg/sentry/fs/ext/disklayout/inode_old.go
new file mode 100644
index 000000000..dc4c9d8e4
--- /dev/null
+++ b/pkg/sentry/fs/ext/disklayout/inode_old.go
@@ -0,0 +1,117 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package disklayout
+
+import (
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+)
+
+const (
+ // oldInodeSize is the inode size in ext2/ext3.
+ oldInodeSize = 128
+)
+
+// InodeOld implements Inode interface. It emulates ext2/ext3 inode struct.
+// Inode struct size and record size are both 128 bytes for this.
+//
+// All fields representing time are in seconds since the epoch. Which means that
+// they will overflow in January 2038.
+type InodeOld struct {
+ ModeRaw uint16
+ UIDLo uint16
+ SizeLo uint32
+
+ // The time fields are signed integers because they could be negative to
+ // represent time before the epoch.
+ AccessTimeRaw int32
+ ChangeTimeRaw int32
+ ModificationTimeRaw int32
+ DeletionTimeRaw int32
+
+ GIDLo uint16
+ LinksCountRaw uint16
+ BlocksCountLo uint32
+ FlagsRaw uint32
+ VersionLo uint32 // This is OS dependent.
+ BlocksRaw [60]byte
+ Generation uint32
+ FileACLLo uint32
+ SizeHi uint32
+ ObsoFaddr uint32
+
+ // OS dependent fields have been inlined here.
+ BlocksCountHi uint16
+ FileACLHi uint16
+ UIDHi uint16
+ GIDHi uint16
+ ChecksumLo uint16
+ _ uint16
+}
+
+// Compiles only if InodeOld implements Inode.
+var _ Inode = (*InodeOld)(nil)
+
+// Mode implements Inode.Mode.
+func (in *InodeOld) Mode() linux.FileMode { return linux.FileMode(in.ModeRaw) }
+
+// UID implements Inode.UID.
+func (in *InodeOld) UID() auth.KUID {
+ return auth.KUID((uint32(in.UIDHi) << 16) | uint32(in.UIDLo))
+}
+
+// GID implements Inode.GID.
+func (in *InodeOld) GID() auth.KGID {
+ return auth.KGID((uint32(in.GIDHi) << 16) | uint32(in.GIDLo))
+}
+
+// Size implements Inode.Size.
+func (in *InodeOld) Size() uint64 {
+ // In ext2/ext3, in.SizeHi did not exist, it was instead named in.DirACL.
+ return uint64(in.SizeLo)
+}
+
+// InodeSize implements Inode.InodeSize.
+func (in *InodeOld) InodeSize() uint16 { return oldInodeSize }
+
+// AccessTime implements Inode.AccessTime.
+func (in *InodeOld) AccessTime() time.Time {
+ return time.FromUnix(int64(in.AccessTimeRaw), 0)
+}
+
+// ChangeTime implements Inode.ChangeTime.
+func (in *InodeOld) ChangeTime() time.Time {
+ return time.FromUnix(int64(in.ChangeTimeRaw), 0)
+}
+
+// ModificationTime implements Inode.ModificationTime.
+func (in *InodeOld) ModificationTime() time.Time {
+ return time.FromUnix(int64(in.ModificationTimeRaw), 0)
+}
+
+// DeletionTime implements Inode.DeletionTime.
+func (in *InodeOld) DeletionTime() time.Time {
+ return time.FromUnix(int64(in.DeletionTimeRaw), 0)
+}
+
+// LinksCount implements Inode.LinksCount.
+func (in *InodeOld) LinksCount() uint16 { return in.LinksCountRaw }
+
+// Flags implements Inode.Flags.
+func (in *InodeOld) Flags() InodeFlags { return InodeFlagsFromInt(in.FlagsRaw) }
+
+// Blocks implements Inode.Blocks.
+func (in *InodeOld) Blocks() [60]byte { return in.BlocksRaw }
diff --git a/pkg/sentry/fs/ext/disklayout/inode_test.go b/pkg/sentry/fs/ext/disklayout/inode_test.go
new file mode 100644
index 000000000..9cae9e4f0
--- /dev/null
+++ b/pkg/sentry/fs/ext/disklayout/inode_test.go
@@ -0,0 +1,222 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package disklayout
+
+import (
+ "fmt"
+ "strconv"
+ "testing"
+
+ "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+)
+
+// TestInodeSize tests that the inode structs are of the correct size.
+func TestInodeSize(t *testing.T) {
+ assertSize(t, InodeOld{}, oldInodeSize)
+
+ // This was updated from 156 bytes to 160 bytes in Oct 2015.
+ assertSize(t, InodeNew{}, 160)
+}
+
+// TestTimestampSeconds tests that the seconds part of [a/c/m] timestamps in
+// ext4 inode structs are decoded correctly.
+//
+// These tests are derived from the table under https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#inode-timestamps.
+func TestTimestampSeconds(t *testing.T) {
+ type timestampTest struct {
+ // msbSet tells if the most significant bit of InodeOld.[X]TimeRaw is set.
+ // If this is set then the 32-bit time is negative.
+ msbSet bool
+
+ // lowerBound tells if we should take the lowest possible value of
+ // InodeOld.[X]TimeRaw while satisfying test.msbSet condition. If set to
+ // false it tells to take the highest possible value.
+ lowerBound bool
+
+ // extraBits is InodeNew.[X]TimeExtra.
+ extraBits uint32
+
+ // want is the kernel time struct that is expected.
+ want time.Time
+ }
+
+ tests := []timestampTest{
+ // 1901-12-13
+ {
+ msbSet: true,
+ lowerBound: true,
+ extraBits: 0,
+ want: time.FromUnix(int64(-0x80000000), 0),
+ },
+
+ // 1969-12-31
+ {
+ msbSet: true,
+ lowerBound: false,
+ extraBits: 0,
+ want: time.FromUnix(int64(-1), 0),
+ },
+
+ // 1970-01-01
+ {
+ msbSet: false,
+ lowerBound: true,
+ extraBits: 0,
+ want: time.FromUnix(int64(0), 0),
+ },
+
+ // 2038-01-19
+ {
+ msbSet: false,
+ lowerBound: false,
+ extraBits: 0,
+ want: time.FromUnix(int64(0x7fffffff), 0),
+ },
+
+ // 2038-01-19
+ {
+ msbSet: true,
+ lowerBound: true,
+ extraBits: 1,
+ want: time.FromUnix(int64(0x80000000), 0),
+ },
+
+ // 2106-02-07
+ {
+ msbSet: true,
+ lowerBound: false,
+ extraBits: 1,
+ want: time.FromUnix(int64(0xffffffff), 0),
+ },
+
+ // 2106-02-07
+ {
+ msbSet: false,
+ lowerBound: true,
+ extraBits: 1,
+ want: time.FromUnix(int64(0x100000000), 0),
+ },
+
+ // 2174-02-25
+ {
+ msbSet: false,
+ lowerBound: false,
+ extraBits: 1,
+ want: time.FromUnix(int64(0x17fffffff), 0),
+ },
+
+ // 2174-02-25
+ {
+ msbSet: true,
+ lowerBound: true,
+ extraBits: 2,
+ want: time.FromUnix(int64(0x180000000), 0),
+ },
+
+ // 2242-03-16
+ {
+ msbSet: true,
+ lowerBound: false,
+ extraBits: 2,
+ want: time.FromUnix(int64(0x1ffffffff), 0),
+ },
+
+ // 2242-03-16
+ {
+ msbSet: false,
+ lowerBound: true,
+ extraBits: 2,
+ want: time.FromUnix(int64(0x200000000), 0),
+ },
+
+ // 2310-04-04
+ {
+ msbSet: false,
+ lowerBound: false,
+ extraBits: 2,
+ want: time.FromUnix(int64(0x27fffffff), 0),
+ },
+
+ // 2310-04-04
+ {
+ msbSet: true,
+ lowerBound: true,
+ extraBits: 3,
+ want: time.FromUnix(int64(0x280000000), 0),
+ },
+
+ // 2378-04-22
+ {
+ msbSet: true,
+ lowerBound: false,
+ extraBits: 3,
+ want: time.FromUnix(int64(0x2ffffffff), 0),
+ },
+
+ // 2378-04-22
+ {
+ msbSet: false,
+ lowerBound: true,
+ extraBits: 3,
+ want: time.FromUnix(int64(0x300000000), 0),
+ },
+
+ // 2446-05-10
+ {
+ msbSet: false,
+ lowerBound: false,
+ extraBits: 3,
+ want: time.FromUnix(int64(0x37fffffff), 0),
+ },
+ }
+
+ lowerMSB0 := int32(0) // binary: 00000000 00000000 00000000 00000000
+ upperMSB0 := int32(0x7fffffff) // binary: 01111111 11111111 11111111 11111111
+ lowerMSB1 := int32(-0x80000000) // binary: 10000000 00000000 00000000 00000000
+ upperMSB1 := int32(-1) // binary: 11111111 11111111 11111111 11111111
+
+ get32BitTime := func(test timestampTest) int32 {
+ if test.msbSet {
+ if test.lowerBound {
+ return lowerMSB1
+ }
+
+ return upperMSB1
+ }
+
+ if test.lowerBound {
+ return lowerMSB0
+ }
+
+ return upperMSB0
+ }
+
+ getTestName := func(test timestampTest) string {
+ return fmt.Sprintf(
+ "Tests time decoding with epoch bits 0b%s and 32-bit raw time: MSB set=%t, lower bound=%t",
+ strconv.FormatInt(int64(test.extraBits), 2),
+ test.msbSet,
+ test.lowerBound,
+ )
+ }
+
+ for _, test := range tests {
+ t.Run(getTestName(test), func(t *testing.T) {
+ if got := fromExtraTime(get32BitTime(test), test.extraBits); got != test.want {
+ t.Errorf("Expected: %v, Got: %v", test.want, got)
+ }
+ })
+ }
+}
diff --git a/pkg/sentry/fs/ext/disklayout/superblock.go b/pkg/sentry/fs/ext/disklayout/superblock.go
new file mode 100644
index 000000000..e4b8f46fb
--- /dev/null
+++ b/pkg/sentry/fs/ext/disklayout/superblock.go
@@ -0,0 +1,468 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package disklayout
+
+// SuperBlock should be implemented by structs representing the ext superblock.
+// The superblock holds a lot of information about the enclosing filesystem.
+// This interface aims to provide access methods to important information held
+// by the superblock. It does NOT expose all fields of the superblock, only the
+// ones necessary. This can be expanded when need be.
+//
+// Location and replication:
+// - The superblock is located at offset 1024 in block group 0.
+// - Redundant copies of the superblock and group descriptors are kept in
+// all groups if SbSparse feature flag is NOT set. If it is set, the
+// replicas only exist in groups whose group number is either 0 or a
+// power of 3, 5, or 7.
+// - There is also a sparse superblock feature v2 in which there are just
+// two replicas saved in the block groups pointed by sb.s_backup_bgs.
+//
+// Replicas should eventually be updated if the superblock is updated.
+//
+// See https://www.kernel.org/doc/html/latest/filesystems/ext4/globals.html#super-block.
+type SuperBlock interface {
+ // InodesCount returns the total number of inodes in this filesystem.
+ InodesCount() uint32
+
+ // BlocksCount returns the total number of data blocks in this filesystem.
+ BlocksCount() uint64
+
+ // FreeBlocksCount returns the number of free blocks in this filesystem.
+ FreeBlocksCount() uint64
+
+ // FreeInodesCount returns the number of free inodes in this filesystem.
+ FreeInodesCount() uint32
+
+ // MountCount returns the number of mounts since the last fsck.
+ MountCount() uint16
+
+ // MaxMountCount returns the number of mounts allowed beyond which a fsck is
+ // needed.
+ MaxMountCount() uint16
+
+ // FirstDataBlock returns the absolute block number of the first data block,
+ // which contains the super block itself.
+ //
+ // If the filesystem has 1kb data blocks then this should return 1. For all
+ // other configurations, this typically returns 0.
+ //
+ // The first block group descriptor is in (FirstDataBlock() + 1)th block.
+ FirstDataBlock() uint32
+
+ // BlockSize returns the size of one data block in this filesystem.
+ // This can be calculated by 2^(10 + sb.s_log_block_size). This ensures that
+ // the smallest block size is 1kb.
+ BlockSize() uint64
+
+ // BlocksPerGroup returns the number of data blocks in a block group.
+ BlocksPerGroup() uint32
+
+ // ClusterSize returns block cluster size (set during mkfs time by admin).
+ // This can be calculated by 2^(10 + sb.s_log_cluster_size). This ensures that
+ // the smallest cluster size is 1kb.
+ //
+ // sb.s_log_cluster_size must equal sb.s_log_block_size if bigalloc feature
+ // is NOT set and consequently BlockSize() = ClusterSize() in that case.
+ ClusterSize() uint64
+
+ // ClustersPerGroup returns:
+ // - number of clusters per group if bigalloc is enabled.
+ // - BlocksPerGroup() otherwise.
+ ClustersPerGroup() uint32
+
+ // InodeSize returns the size of the inode disk record size in bytes. Use this
+ // to iterate over inode arrays on disk.
+ //
+ // In ext2 and ext3:
+ // - Each inode had a disk record of 128 bytes.
+ // - The inode struct size was fixed at 128 bytes.
+ //
+ // In ext4 its possible to allocate larger on-disk inodes:
+ // - Inode disk record size = sb.s_inode_size (function return value).
+ // = 256 (default)
+ // - Inode struct size = 128 + inode.i_extra_isize.
+ // = 128 + 32 = 160 (default)
+ InodeSize() uint16
+
+ // InodesPerGroup returns the number of inodes in a block group.
+ InodesPerGroup() uint32
+
+ // BgDescSize returns the size of the block group descriptor struct.
+ //
+ // In ext2, ext3, ext4 (without 64-bit feature), the block group descriptor
+ // is only 32 bytes long.
+ // In ext4 with 64-bit feature, the block group descriptor expands to AT LEAST
+ // 64 bytes. It might be bigger than that.
+ BgDescSize() uint16
+
+ // CompatibleFeatures returns the CompatFeatures struct which holds all the
+ // compatible features this fs supports.
+ CompatibleFeatures() CompatFeatures
+
+ // IncompatibleFeatures returns the CompatFeatures struct which holds all the
+ // incompatible features this fs supports.
+ IncompatibleFeatures() IncompatFeatures
+
+ // ReadOnlyCompatibleFeatures returns the CompatFeatures struct which holds all the
+ // readonly compatible features this fs supports.
+ ReadOnlyCompatibleFeatures() RoCompatFeatures
+
+ // Magic() returns the magic signature which must be 0xef53.
+ Magic() uint16
+
+ // Revision returns the superblock revision. Superblock struct fields from
+ // offset 0x54 till 0x150 should only be used if superblock has DynamicRev.
+ Revision() SbRevision
+}
+
+// SbRevision is the type for superblock revisions.
+type SbRevision int
+
+// Super block revisions.
+const (
+ // OldRev is the good old (original) format.
+ OldRev SbRevision = 0
+
+ // DynamicRev is v2 format w/ dynamic inode sizes.
+ DynamicRev SbRevision = 1
+)
+
+// Superblock compatible features.
+// This is not exhaustive, unused features are not listed.
+const (
+ // SbDirPrealloc indicates directory preallocation.
+ SbDirPrealloc = 0x1
+
+ // SbHasJournal indicates the presence of a journal. jbd2 should only work
+ // with this being set.
+ SbHasJournal = 0x4
+
+ // SbExtAttr indicates extended attributes support.
+ SbExtAttr = 0x8
+
+ // SbResizeInode indicates that the fs has reserved GDT blocks (right after
+ // group descriptors) for fs expansion.
+ SbResizeInode = 0x10
+
+ // SbDirIndex indicates that the fs has directory indices.
+ SbDirIndex = 0x20
+
+ // SbSparseV2 stands for Sparse superblock version 2.
+ SbSparseV2 = 0x200
+)
+
+// CompatFeatures represents a superblock's compatible feature set. If the
+// kernel does not understand any of these feature, it can still read/write
+// to this fs.
+type CompatFeatures struct {
+ DirPrealloc bool
+ HasJournal bool
+ ExtAttr bool
+ ResizeInode bool
+ DirIndex bool
+ SparseV2 bool
+}
+
+// ToInt converts superblock compatible features back to its 32-bit rep.
+func (f CompatFeatures) ToInt() uint32 {
+ var res uint32
+
+ if f.DirPrealloc {
+ res |= SbDirPrealloc
+ }
+ if f.HasJournal {
+ res |= SbHasJournal
+ }
+ if f.ExtAttr {
+ res |= SbExtAttr
+ }
+ if f.ResizeInode {
+ res |= SbResizeInode
+ }
+ if f.DirIndex {
+ res |= SbDirIndex
+ }
+ if f.SparseV2 {
+ res |= SbSparseV2
+ }
+
+ return res
+}
+
+// CompatFeaturesFromInt converts the integer representation of superblock
+// compatible features to CompatFeatures struct.
+func CompatFeaturesFromInt(f uint32) CompatFeatures {
+ return CompatFeatures{
+ DirPrealloc: f&SbDirPrealloc > 0,
+ HasJournal: f&SbHasJournal > 0,
+ ExtAttr: f&SbExtAttr > 0,
+ ResizeInode: f&SbResizeInode > 0,
+ DirIndex: f&SbDirIndex > 0,
+ SparseV2: f&SbSparseV2 > 0,
+ }
+}
+
+// Superblock incompatible features.
+// This is not exhaustive, unused features are not listed.
+const (
+ // SbDirentFileType indicates that directory entries record the file type.
+ // We should use struct ext4_dir_entry_2 for dirents then.
+ SbDirentFileType = 0x2
+
+ // SbRecovery indicates that the filesystem needs recovery.
+ SbRecovery = 0x4
+
+ // SbJournalDev indicates that the filesystem has a separate journal device.
+ SbJournalDev = 0x8
+
+ // SbMetaBG indicates that the filesystem is using Meta block groups. Moves
+ // the group descriptors from the congested first block group into the first
+ // group of each metablock group to increase the maximum block groups limit
+ // and hence support much larger filesystems.
+ //
+ // See https://www.kernel.org/doc/html/latest/filesystems/ext4/overview.html#meta-block-groups.
+ SbMetaBG = 0x10
+
+ // SbExtents indicates that the filesystem uses extents. Must be set in ext4
+ // filesystems.
+ SbExtents = 0x40
+
+ // SbIs64Bit indicates that this filesystem addresses blocks with 64-bits.
+ // Hence can support 2^64 data blocks.
+ SbIs64Bit = 0x80
+
+ // SbMMP indicates that this filesystem has multiple mount protection.
+ //
+ // See https://www.kernel.org/doc/html/latest/filesystems/ext4/globals.html#multiple-mount-protection.
+ SbMMP = 0x100
+
+ // SbFlexBg indicates that this filesystem has flexible block groups. Several
+ // block groups are tied into one logical block group so that all the metadata
+ // for the block groups (bitmaps and inode tables) are close together for
+ // faster loading. Consequently, large files will be continuous on disk.
+ // However, this does not affect the placement of redundant superblocks and
+ // group descriptors.
+ //
+ // See https://www.kernel.org/doc/html/latest/filesystems/ext4/overview.html#flexible-block-groups.
+ SbFlexBg = 0x200
+
+ // SbLargeDir shows that large directory enabled. Directory htree can be 3
+ // levels deep. Directory htrees are allowed to be 2 levels deep otherwise.
+ SbLargeDir = 0x4000
+
+ // SbInlineData allows inline data in inodes for really small files.
+ SbInlineData = 0x8000
+
+ // SbEncrypted indicates that this fs contains encrypted inodes.
+ SbEncrypted = 0x10000
+)
+
+// IncompatFeatures represents a superblock's incompatible feature set. If the
+// kernel does not understand any of these feature, it should refuse to mount.
+type IncompatFeatures struct {
+ DirentFileType bool
+ Recovery bool
+ JournalDev bool
+ MetaBG bool
+ Extents bool
+ Is64Bit bool
+ MMP bool
+ FlexBg bool
+ LargeDir bool
+ InlineData bool
+ Encrypted bool
+}
+
+// ToInt converts superblock incompatible features back to its 32-bit rep.
+func (f IncompatFeatures) ToInt() uint32 {
+ var res uint32
+
+ if f.DirentFileType {
+ res |= SbDirentFileType
+ }
+ if f.Recovery {
+ res |= SbRecovery
+ }
+ if f.JournalDev {
+ res |= SbJournalDev
+ }
+ if f.MetaBG {
+ res |= SbMetaBG
+ }
+ if f.Extents {
+ res |= SbExtents
+ }
+ if f.Is64Bit {
+ res |= SbIs64Bit
+ }
+ if f.MMP {
+ res |= SbMMP
+ }
+ if f.FlexBg {
+ res |= SbFlexBg
+ }
+ if f.LargeDir {
+ res |= SbLargeDir
+ }
+ if f.InlineData {
+ res |= SbInlineData
+ }
+ if f.Encrypted {
+ res |= SbEncrypted
+ }
+
+ return res
+}
+
+// IncompatFeaturesFromInt converts the integer representation of superblock
+// incompatible features to IncompatFeatures struct.
+func IncompatFeaturesFromInt(f uint32) IncompatFeatures {
+ return IncompatFeatures{
+ DirentFileType: f&SbDirentFileType > 0,
+ Recovery: f&SbRecovery > 0,
+ JournalDev: f&SbJournalDev > 0,
+ MetaBG: f&SbMetaBG > 0,
+ Extents: f&SbExtents > 0,
+ Is64Bit: f&SbIs64Bit > 0,
+ MMP: f&SbMMP > 0,
+ FlexBg: f&SbFlexBg > 0,
+ LargeDir: f&SbLargeDir > 0,
+ InlineData: f&SbInlineData > 0,
+ Encrypted: f&SbEncrypted > 0,
+ }
+}
+
+// Superblock readonly compatible features.
+// This is not exhaustive, unused features are not listed.
+const (
+ // SbSparse indicates sparse superblocks. Only groups with number either 0 or
+ // a power of 3, 5, or 7 will have redundant copies of the superblock and
+ // block descriptors.
+ SbSparse = 0x1
+
+ // SbLargeFile indicates that this fs has been used to store a file >= 2GiB.
+ SbLargeFile = 0x2
+
+ // SbHugeFile indicates that this fs contains files whose sizes are
+ // represented in units of logicals blocks, not 512-byte sectors.
+ SbHugeFile = 0x8
+
+ // SbGdtCsum indicates that group descriptors have checksums.
+ SbGdtCsum = 0x10
+
+ // SbDirNlink indicates that the new subdirectory limit is 64,999. Ext3 has a
+ // 32,000 subdirectory limit.
+ SbDirNlink = 0x20
+
+ // SbExtraIsize indicates that large inodes exist on this filesystem.
+ SbExtraIsize = 0x40
+
+ // SbHasSnapshot indicates the existence of a snapshot.
+ SbHasSnapshot = 0x80
+
+ // SbQuota enables usage tracking for all quota types.
+ SbQuota = 0x100
+
+ // SbBigalloc maps to the bigalloc feature. When set, the minimum allocation
+ // unit becomes a cluster rather than a data block. Then block bitmaps track
+ // clusters, not data blocks.
+ //
+ // See https://www.kernel.org/doc/html/latest/filesystems/ext4/overview.html#bigalloc.
+ SbBigalloc = 0x200
+
+ // SbMetadataCsum indicates that the fs supports metadata checksumming.
+ SbMetadataCsum = 0x400
+
+ // SbReadOnly marks this filesystem as readonly. Should refuse to mount in
+ // read/write mode.
+ SbReadOnly = 0x1000
+)
+
+// RoCompatFeatures represents a superblock's readonly compatible feature set.
+// If the kernel does not understand any of these feature, it can still mount
+// readonly. But if the user wants to mount read/write, the kernel should
+// refuse to mount.
+type RoCompatFeatures struct {
+ Sparse bool
+ LargeFile bool
+ HugeFile bool
+ GdtCsum bool
+ DirNlink bool
+ ExtraIsize bool
+ HasSnapshot bool
+ Quota bool
+ Bigalloc bool
+ MetadataCsum bool
+ ReadOnly bool
+}
+
+// ToInt converts superblock readonly compatible features to its 32-bit rep.
+func (f RoCompatFeatures) ToInt() uint32 {
+ var res uint32
+
+ if f.Sparse {
+ res |= SbSparse
+ }
+ if f.LargeFile {
+ res |= SbLargeFile
+ }
+ if f.HugeFile {
+ res |= SbHugeFile
+ }
+ if f.GdtCsum {
+ res |= SbGdtCsum
+ }
+ if f.DirNlink {
+ res |= SbDirNlink
+ }
+ if f.ExtraIsize {
+ res |= SbExtraIsize
+ }
+ if f.HasSnapshot {
+ res |= SbHasSnapshot
+ }
+ if f.Quota {
+ res |= SbQuota
+ }
+ if f.Bigalloc {
+ res |= SbBigalloc
+ }
+ if f.MetadataCsum {
+ res |= SbMetadataCsum
+ }
+ if f.ReadOnly {
+ res |= SbReadOnly
+ }
+
+ return res
+}
+
+// RoCompatFeaturesFromInt converts the integer representation of superblock
+// readonly compatible features to RoCompatFeatures struct.
+func RoCompatFeaturesFromInt(f uint32) RoCompatFeatures {
+ return RoCompatFeatures{
+ Sparse: f&SbSparse > 0,
+ LargeFile: f&SbLargeFile > 0,
+ HugeFile: f&SbHugeFile > 0,
+ GdtCsum: f&SbGdtCsum > 0,
+ DirNlink: f&SbDirNlink > 0,
+ ExtraIsize: f&SbExtraIsize > 0,
+ HasSnapshot: f&SbHasSnapshot > 0,
+ Quota: f&SbQuota > 0,
+ Bigalloc: f&SbBigalloc > 0,
+ MetadataCsum: f&SbMetadataCsum > 0,
+ ReadOnly: f&SbReadOnly > 0,
+ }
+}
diff --git a/pkg/sentry/fs/ext/disklayout/superblock_32.go b/pkg/sentry/fs/ext/disklayout/superblock_32.go
new file mode 100644
index 000000000..587e4afaa
--- /dev/null
+++ b/pkg/sentry/fs/ext/disklayout/superblock_32.go
@@ -0,0 +1,75 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package disklayout
+
+// SuperBlock32Bit implements SuperBlock and represents the 32-bit version of
+// the ext4_super_block struct in fs/ext4/ext4.h.
+type SuperBlock32Bit struct {
+ // We embed the old superblock struct here because the 32-bit version is just
+ // an extension of the old version.
+ SuperBlockOld
+
+ FirstInode uint32
+ InodeSizeRaw uint16
+ BlockGroupNumber uint16
+ FeatureCompat uint32
+ FeatureIncompat uint32
+ FeatureRoCompat uint32
+ UUID [16]byte
+ VolumeName [16]byte
+ LastMounted [64]byte
+ AlgoUsageBitmap uint32
+ PreallocBlocks uint8
+ PreallocDirBlocks uint8
+ ReservedGdtBlocks uint16
+ JournalUUID [16]byte
+ JournalInum uint32
+ JournalDev uint32
+ LastOrphan uint32
+ HashSeed [4]uint32
+ DefaultHashVersion uint8
+ JnlBackupType uint8
+ BgDescSizeRaw uint16
+ DefaultMountOpts uint32
+ FirstMetaBg uint32
+ MkfsTime uint32
+ JnlBlocks [17]uint32
+}
+
+// Compiles only if SuperBlock32Bit implements SuperBlock.
+var _ SuperBlock = (*SuperBlock32Bit)(nil)
+
+// Only override methods which change based on the additional fields above.
+// Not overriding SuperBlock.BgDescSize because it would still return 32 here.
+
+// InodeSize implements SuperBlock.InodeSize.
+func (sb *SuperBlock32Bit) InodeSize() uint16 {
+ return sb.InodeSizeRaw
+}
+
+// CompatibleFeatures implements SuperBlock.CompatibleFeatures.
+func (sb *SuperBlock32Bit) CompatibleFeatures() CompatFeatures {
+ return CompatFeaturesFromInt(sb.FeatureCompat)
+}
+
+// IncompatibleFeatures implements SuperBlock.IncompatibleFeatures.
+func (sb *SuperBlock32Bit) IncompatibleFeatures() IncompatFeatures {
+ return IncompatFeaturesFromInt(sb.FeatureIncompat)
+}
+
+// ReadOnlyCompatibleFeatures implements SuperBlock.ReadOnlyCompatibleFeatures.
+func (sb *SuperBlock32Bit) ReadOnlyCompatibleFeatures() RoCompatFeatures {
+ return RoCompatFeaturesFromInt(sb.FeatureRoCompat)
+}
diff --git a/pkg/sentry/fs/ext/disklayout/superblock_64.go b/pkg/sentry/fs/ext/disklayout/superblock_64.go
new file mode 100644
index 000000000..a2c2278fb
--- /dev/null
+++ b/pkg/sentry/fs/ext/disklayout/superblock_64.go
@@ -0,0 +1,94 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package disklayout
+
+// SuperBlock64Bit implements SuperBlock and represents the 64-bit version of
+// the ext4_super_block struct in fs/ext4/ext4.h. This sums up to be exactly
+// 1024 bytes (smallest possible block size) and hence the superblock always
+// fits in no more than one data block.
+type SuperBlock64Bit struct {
+ // We embed the 32-bit struct here because 64-bit version is just an extension
+ // of the 32-bit version.
+ SuperBlock32Bit
+
+ BlocksCountHi uint32
+ ReservedBlocksCountHi uint32
+ FreeBlocksCountHi uint32
+ MinInodeSize uint16
+ WantInodeSize uint16
+ Flags uint32
+ RaidStride uint16
+ MmpInterval uint16
+ MmpBlock uint64
+ RaidStripeWidth uint32
+ LogGroupsPerFlex uint8
+ ChecksumType uint8
+ _ uint16
+ KbytesWritten uint64
+ SnapshotInum uint32
+ SnapshotID uint32
+ SnapshotRsrvBlocksCount uint64
+ SnapshotList uint32
+ ErrorCount uint32
+ FirstErrorTime uint32
+ FirstErrorInode uint32
+ FirstErrorBlock uint64
+ FirstErrorFunction [32]byte
+ FirstErrorLine uint32
+ LastErrorTime uint32
+ LastErrorInode uint32
+ LastErrorLine uint32
+ LastErrorBlock uint64
+ LastErrorFunction [32]byte
+ MountOpts [64]byte
+ UserQuotaInum uint32
+ GroupQuotaInum uint32
+ OverheadBlocks uint32
+ BackupBgs [2]uint32
+ EncryptAlgos [4]uint8
+ EncryptPwSalt [16]uint8
+ LostFoundInode uint32
+ ProjectQuotaInode uint32
+ ChecksumSeed uint32
+ WtimeHi uint8
+ MtimeHi uint8
+ MkfsTimeHi uint8
+ LastCheckHi uint8
+ FirstErrorTimeHi uint8
+ LastErrorTimeHi uint8
+ _ [2]uint8
+ Encoding uint16
+ EncodingFlags uint16
+ _ [95]uint32
+ Checksum uint32
+}
+
+// Compiles only if SuperBlock64Bit implements SuperBlock.
+var _ SuperBlock = (*SuperBlock64Bit)(nil)
+
+// Only override methods which change based on the 64-bit feature.
+
+// BlocksCount implements SuperBlock.BlocksCount.
+func (sb *SuperBlock64Bit) BlocksCount() uint64 {
+ return (uint64(sb.BlocksCountHi) << 32) | uint64(sb.BlocksCountLo)
+}
+
+// FreeBlocksCount implements SuperBlock.FreeBlocksCount.
+func (sb *SuperBlock64Bit) FreeBlocksCount() uint64 {
+ return (uint64(sb.FreeBlocksCountHi) << 32) | uint64(sb.FreeBlocksCountLo)
+}
+
+// BgDescSize implements SuperBlock.BgDescSize.
+func (sb *SuperBlock64Bit) BgDescSize() uint16 { return sb.BgDescSizeRaw }
diff --git a/pkg/sentry/fs/ext/disklayout/superblock_old.go b/pkg/sentry/fs/ext/disklayout/superblock_old.go
new file mode 100644
index 000000000..c74953610
--- /dev/null
+++ b/pkg/sentry/fs/ext/disklayout/superblock_old.go
@@ -0,0 +1,102 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package disklayout
+
+// SuperBlockOld implements SuperBlock and represents the old version of the
+// superblock struct in ext2 and ext3 systems.
+type SuperBlockOld struct {
+ InodesCountRaw uint32
+ BlocksCountLo uint32
+ ReservedBlocksCount uint32
+ FreeBlocksCountLo uint32
+ FreeInodesCountRaw uint32
+ FirstDataBlockRaw uint32
+ LogBlockSize uint32
+ LogClusterSize uint32
+ BlocksPerGroupRaw uint32
+ ClustersPerGroupRaw uint32
+ InodesPerGroupRaw uint32
+ Mtime uint32
+ Wtime uint32
+ MountCountRaw uint16
+ MaxMountCountRaw uint16
+ MagicRaw uint16
+ State uint16
+ Errors uint16
+ MinorRevLevel uint16
+ LastCheck uint32
+ CheckInterval uint32
+ CreatorOS uint32
+ RevLevel uint32
+ DefResUID uint16
+ DefResGID uint16
+}
+
+// InodesCount implements SuperBlock.InodesCount.
+func (sb *SuperBlockOld) InodesCount() uint32 { return sb.InodesCountRaw }
+
+// BlocksCount implements SuperBlock.BlocksCount.
+func (sb *SuperBlockOld) BlocksCount() uint64 { return uint64(sb.BlocksCountLo) }
+
+// FreeBlocksCount implements SuperBlock.FreeBlocksCount.
+func (sb *SuperBlockOld) FreeBlocksCount() uint64 { return uint64(sb.FreeBlocksCountLo) }
+
+// FreeInodesCount implements SuperBlock.FreeInodesCount.
+func (sb *SuperBlockOld) FreeInodesCount() uint32 { return sb.FreeInodesCountRaw }
+
+// MountCount implements SuperBlock.MountCount.
+func (sb *SuperBlockOld) MountCount() uint16 { return sb.MountCountRaw }
+
+// MaxMountCount implements SuperBlock.MaxMountCount.
+func (sb *SuperBlockOld) MaxMountCount() uint16 { return sb.MaxMountCountRaw }
+
+// FirstDataBlock implements SuperBlock.FirstDataBlock.
+func (sb *SuperBlockOld) FirstDataBlock() uint32 { return sb.FirstDataBlockRaw }
+
+// BlockSize implements SuperBlock.BlockSize.
+func (sb *SuperBlockOld) BlockSize() uint64 { return 1 << (10 + sb.LogBlockSize) }
+
+// BlocksPerGroup implements SuperBlock.BlocksPerGroup.
+func (sb *SuperBlockOld) BlocksPerGroup() uint32 { return sb.BlocksPerGroupRaw }
+
+// ClusterSize implements SuperBlock.ClusterSize.
+func (sb *SuperBlockOld) ClusterSize() uint64 { return 1 << (10 + sb.LogClusterSize) }
+
+// ClustersPerGroup implements SuperBlock.ClustersPerGroup.
+func (sb *SuperBlockOld) ClustersPerGroup() uint32 { return sb.ClustersPerGroupRaw }
+
+// InodeSize implements SuperBlock.InodeSize.
+func (sb *SuperBlockOld) InodeSize() uint16 { return oldInodeSize }
+
+// InodesPerGroup implements SuperBlock.InodesPerGroup.
+func (sb *SuperBlockOld) InodesPerGroup() uint32 { return sb.InodesPerGroupRaw }
+
+// BgDescSize implements SuperBlock.BgDescSize.
+func (sb *SuperBlockOld) BgDescSize() uint16 { return 32 }
+
+// CompatibleFeatures implements SuperBlock.CompatibleFeatures.
+func (sb *SuperBlockOld) CompatibleFeatures() CompatFeatures { return CompatFeatures{} }
+
+// IncompatibleFeatures implements SuperBlock.IncompatibleFeatures.
+func (sb *SuperBlockOld) IncompatibleFeatures() IncompatFeatures { return IncompatFeatures{} }
+
+// ReadOnlyCompatibleFeatures implements SuperBlock.ReadOnlyCompatibleFeatures.
+func (sb *SuperBlockOld) ReadOnlyCompatibleFeatures() RoCompatFeatures { return RoCompatFeatures{} }
+
+// Magic implements SuperBlock.Magic.
+func (sb *SuperBlockOld) Magic() uint16 { return sb.MagicRaw }
+
+// Revision implements SuperBlock.Revision.
+func (sb *SuperBlockOld) Revision() SbRevision { return SbRevision(sb.RevLevel) }
diff --git a/pkg/sentry/fs/ext/disklayout/superblock_test.go b/pkg/sentry/fs/ext/disklayout/superblock_test.go
new file mode 100644
index 000000000..463b5ba21
--- /dev/null
+++ b/pkg/sentry/fs/ext/disklayout/superblock_test.go
@@ -0,0 +1,27 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package disklayout
+
+import (
+ "testing"
+)
+
+// TestSuperBlockSize tests that the superblock structs are of the correct
+// size.
+func TestSuperBlockSize(t *testing.T) {
+ assertSize(t, SuperBlockOld{}, 84)
+ assertSize(t, SuperBlock32Bit{}, 336)
+ assertSize(t, SuperBlock64Bit{}, 1024)
+}
diff --git a/pkg/sentry/fs/ext/disklayout/test_utils.go b/pkg/sentry/fs/ext/disklayout/test_utils.go
new file mode 100644
index 000000000..9c63f04c0
--- /dev/null
+++ b/pkg/sentry/fs/ext/disklayout/test_utils.go
@@ -0,0 +1,30 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package disklayout
+
+import (
+ "reflect"
+ "testing"
+
+ "gvisor.dev/gvisor/pkg/binary"
+)
+
+func assertSize(t *testing.T, v interface{}, want uintptr) {
+ t.Helper()
+
+ if got := binary.Size(v); got != want {
+ t.Errorf("struct %s should be exactly %d bytes but is %d bytes", reflect.TypeOf(v).Name(), want, got)
+ }
+}
diff --git a/pkg/sentry/fs/ext/ext.go b/pkg/sentry/fs/ext/ext.go
new file mode 100644
index 000000000..7602e2bf0
--- /dev/null
+++ b/pkg/sentry/fs/ext/ext.go
@@ -0,0 +1,49 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package ext implements readonly ext(2/3/4) filesystems.
+package ext
+
+import (
+ "io"
+ "sync"
+
+ "gvisor.dev/gvisor/pkg/sentry/fs/ext/disklayout"
+)
+
+// Filesystem implements vfs.FilesystemImpl.
+type Filesystem struct {
+ // dev is the ReadSeeker for the underlying fs device and is protected by mu.
+ dev io.ReadSeeker
+
+ // mu synchronizes the usage of dev. The ext filesystems take locality into
+ // condsideration, i.e. data blocks of a file will tend to be placed close
+ // together. On a spinning disk, locality reduces the amount of movement of
+ // the head hence speeding up IO operations. On an SSD there are no moving
+ // parts but locality increases the size of each transer request. Hence,
+ // having mutual exclusion on the read seeker while reading a file *should*
+ // help in achieving the intended performance gains.
+ //
+ // Note: This synchronization was not coupled with the ReadSeeker itself
+ // because we want to synchronize across read/seek operations for the
+ // performance gains mentioned above. Helps enforcing one-file-at-a-time IO.
+ mu sync.Mutex
+
+ // sb represents the filesystem superblock. Immutable after initialization.
+ sb disklayout.SuperBlock
+
+ // bgs represents all the block group descriptors for the filesystem.
+ // Immutable after initialization.
+ bgs []disklayout.BlockGroup
+}
diff --git a/pkg/sentry/fs/ext4/BUILD b/pkg/sentry/fs/ext4/BUILD
deleted file mode 100644
index 9df9084c3..000000000
--- a/pkg/sentry/fs/ext4/BUILD
+++ /dev/null
@@ -1,14 +0,0 @@
-package(licenses = ["notice"])
-
-load("//tools/go_stateify:defs.bzl", "go_library")
-
-go_library(
- name = "ext4",
- srcs = ["fs.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ext4",
- visibility = ["//pkg/sentry:internal"],
- deps = [
- "//pkg/sentry/context",
- "//pkg/sentry/fs",
- ],
-)
diff --git a/pkg/sentry/fs/ext4/fs.go b/pkg/sentry/fs/ext4/fs.go
deleted file mode 100644
index de5f0ef63..000000000
--- a/pkg/sentry/fs/ext4/fs.go
+++ /dev/null
@@ -1,61 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package ext4 implements the ext4 filesystem.
-package ext4
-
-import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
-)
-
-// filesystem implements fs.Filesystem for ext4.
-//
-// +stateify savable
-type filesystem struct{}
-
-func init() {
- fs.RegisterFilesystem(&filesystem{})
-}
-
-// FilesystemName is the name under which the filesystem is registered.
-// Name matches fs/ext4/super.c:ext4_fs_type.name.
-const FilesystemName = "ext4"
-
-// Name is the name of the file system.
-func (*filesystem) Name() string {
- return FilesystemName
-}
-
-// AllowUserMount prohibits users from using mount(2) with this file system.
-func (*filesystem) AllowUserMount() bool {
- return false
-}
-
-// AllowUserList prohibits this filesystem to be listed in /proc/filesystems.
-func (*filesystem) AllowUserList() bool {
- return false
-}
-
-// Flags returns properties of the filesystem.
-//
-// In Linux, ext4 returns FS_REQUIRES_DEV. See fs/ext4/super.c
-func (*filesystem) Flags() fs.FilesystemFlags {
- return fs.FilesystemRequiresDev
-}
-
-// Mount returns the root inode of the ext4 fs.
-func (f *filesystem) Mount(ctx context.Context, device string, flags fs.MountSourceFlags, data string, cgroupsInt interface{}) (*fs.Inode, error) {
- panic("unimplemented")
-}
diff --git a/pkg/sentry/fs/fdpipe/BUILD b/pkg/sentry/fs/fdpipe/BUILD
index 098463e97..bf00b9c09 100644
--- a/pkg/sentry/fs/fdpipe/BUILD
+++ b/pkg/sentry/fs/fdpipe/BUILD
@@ -9,8 +9,8 @@ go_library(
"pipe_opener.go",
"pipe_state.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fdpipe",
- imports = ["gvisor.googlesource.com/gvisor/pkg/sentry/fs"],
+ importpath = "gvisor.dev/gvisor/pkg/sentry/fs/fdpipe",
+ imports = ["gvisor.dev/gvisor/pkg/sentry/fs"],
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/fd",
diff --git a/pkg/sentry/fs/fdpipe/pipe.go b/pkg/sentry/fs/fdpipe/pipe.go
index 4ef7ea08a..5a0a67eab 100644
--- a/pkg/sentry/fs/fdpipe/pipe.go
+++ b/pkg/sentry/fs/fdpipe/pipe.go
@@ -20,17 +20,17 @@ import (
"sync"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/fd"
- "gvisor.googlesource.com/gvisor/pkg/fdnotifier"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/secio"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/fd"
+ "gvisor.dev/gvisor/pkg/fdnotifier"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/secio"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/safemem"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// pipeOperations are the fs.FileOperations of a host pipe.
diff --git a/pkg/sentry/fs/fdpipe/pipe_opener.go b/pkg/sentry/fs/fdpipe/pipe_opener.go
index 0cabe2e18..64b558975 100644
--- a/pkg/sentry/fs/fdpipe/pipe_opener.go
+++ b/pkg/sentry/fs/fdpipe/pipe_opener.go
@@ -20,10 +20,10 @@ import (
"syscall"
"time"
- "gvisor.googlesource.com/gvisor/pkg/fd"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/fd"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// NonBlockingOpener is a generic host file opener used to retry opening host
diff --git a/pkg/sentry/fs/fdpipe/pipe_opener_test.go b/pkg/sentry/fs/fdpipe/pipe_opener_test.go
index 8c8b1b40c..8e4d839e1 100644
--- a/pkg/sentry/fs/fdpipe/pipe_opener_test.go
+++ b/pkg/sentry/fs/fdpipe/pipe_opener_test.go
@@ -25,12 +25,12 @@ import (
"time"
"github.com/google/uuid"
- "gvisor.googlesource.com/gvisor/pkg/fd"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/fd"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
type hostOpener struct {
@@ -359,7 +359,7 @@ func TestCopiedReadAheadBuffer(t *testing.T) {
inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{
Type: fs.Pipe,
})
- file := fs.NewFile(ctx, fs.NewDirent(inode, "pipe"), fs.FileFlags{Read: true}, pipeOps)
+ file := fs.NewFile(ctx, fs.NewDirent(ctx, inode, "pipe"), fs.FileFlags{Read: true}, pipeOps)
// Check that the file we opened points to a pipe with a non-empty read ahead buffer.
bufsize := len(pipeOps.readAheadBuffer)
diff --git a/pkg/sentry/fs/fdpipe/pipe_state.go b/pkg/sentry/fs/fdpipe/pipe_state.go
index 8b347aa11..29175fb3d 100644
--- a/pkg/sentry/fs/fdpipe/pipe_state.go
+++ b/pkg/sentry/fs/fdpipe/pipe_state.go
@@ -19,8 +19,8 @@ import (
"io/ioutil"
"sync"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
)
// beforeSave is invoked by stateify.
diff --git a/pkg/sentry/fs/fdpipe/pipe_test.go b/pkg/sentry/fs/fdpipe/pipe_test.go
index b59a6aa0e..69abc1e71 100644
--- a/pkg/sentry/fs/fdpipe/pipe_test.go
+++ b/pkg/sentry/fs/fdpipe/pipe_test.go
@@ -21,12 +21,12 @@ import (
"syscall"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/fd"
- "gvisor.googlesource.com/gvisor/pkg/fdnotifier"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/fd"
+ "gvisor.dev/gvisor/pkg/fdnotifier"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
func singlePipeFD() (int, error) {
@@ -50,11 +50,11 @@ func mockPipeDirent(t *testing.T) *fs.Dirent {
User: fs.PermMask{Read: true, Write: true},
},
}
- inode := fs.NewInode(node, fs.NewMockMountSource(nil), fs.StableAttr{
+ inode := fs.NewInode(ctx, node, fs.NewMockMountSource(nil), fs.StableAttr{
Type: fs.Pipe,
BlockSize: usermem.PageSize,
})
- return fs.NewDirent(inode, "")
+ return fs.NewDirent(ctx, inode, "")
}
func TestNewPipe(t *testing.T) {
@@ -285,7 +285,7 @@ func TestPipeRequest(t *testing.T) {
defer p.Release()
inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{Type: fs.Pipe})
- file := fs.NewFile(ctx, fs.NewDirent(inode, "pipe"), fs.FileFlags{Read: true}, p)
+ file := fs.NewFile(ctx, fs.NewDirent(ctx, inode, "pipe"), fs.FileFlags{Read: true}, p)
// Issue request via the appropriate function.
switch c := test.context.(type) {
@@ -339,7 +339,7 @@ func TestPipeReadAheadBuffer(t *testing.T) {
inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{
Type: fs.Pipe,
})
- file := fs.NewFile(ctx, fs.NewDirent(inode, "pipe"), fs.FileFlags{Read: true}, p)
+ file := fs.NewFile(ctx, fs.NewDirent(ctx, inode, "pipe"), fs.FileFlags{Read: true}, p)
// In total we expect to read data + buffered.
total := append(buffered, data...)
@@ -358,9 +358,9 @@ func TestPipeReadAheadBuffer(t *testing.T) {
}
}
-// This is very important for pipes in general because they can return EWOULDBLOCK and for
-// those that block they must continue until they have read all of the data (and report it
-// as such.
+// This is very important for pipes in general because they can return
+// EWOULDBLOCK and for those that block they must continue until they have read
+// all of the data (and report it as such).
func TestPipeReadsAccumulate(t *testing.T) {
fds := make([]int, 2)
if err := syscall.Pipe(fds); err != nil {
@@ -385,7 +385,7 @@ func TestPipeReadsAccumulate(t *testing.T) {
inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{
Type: fs.Pipe,
})
- file := fs.NewFile(ctx, fs.NewDirent(inode, "pipe"), fs.FileFlags{Read: true}, p)
+ file := fs.NewFile(ctx, fs.NewDirent(ctx, inode, "pipe"), fs.FileFlags{Read: true}, p)
// Write some some bytes to the pipe.
data := []byte("some message")
@@ -393,8 +393,8 @@ func TestPipeReadsAccumulate(t *testing.T) {
t.Fatalf("write to pipe got (%d, %v), want (%d, nil)", n, err, len(data))
}
- // Construct a segment vec that is a bit more than we have written so we trigger
- // an EWOULDBLOCK.
+ // Construct a segment vec that is a bit more than we have written so we
+ // trigger an EWOULDBLOCK.
wantBytes := len(data) + 1
readBuffer := make([]byte, wantBytes)
iov := usermem.BytesIOSequence(readBuffer)
@@ -446,41 +446,57 @@ func TestPipeWritesAccumulate(t *testing.T) {
wfile.Close()
t.Fatalf("newPipeOperations got error %v, want nil", err)
}
- // Don't forget to remove the fd from the fd notifier. Otherwise other tests will
- // likely be borked, because it's global :(
+ // Don't forget to remove the fd from the fd notifier. Otherwise other tests
+ // will likely be borked, because it's global :(
defer p.Release()
inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{
Type: fs.Pipe,
})
- file := fs.NewFile(ctx, fs.NewDirent(inode, "pipe"), fs.FileFlags{Read: true}, p)
+ file := fs.NewFile(ctx, fs.NewDirent(ctx, inode, "pipe"), fs.FileFlags{Read: true}, p)
+
+ pipeSize, _, errno := syscall.Syscall(syscall.SYS_FCNTL, uintptr(wfile.FD()), syscall.F_GETPIPE_SZ, 0)
+ if errno != 0 {
+ t.Fatalf("fcntl(F_GETPIPE_SZ) failed: %v", errno)
+ }
+ t.Logf("Pipe buffer size: %d", pipeSize)
- // Construct a segment vec that is larger than the pipe size to trigger an EWOULDBLOCK.
- wantBytes := 65536 * 2
+ // Construct a segment vec that is larger than the pipe size to trigger an
+ // EWOULDBLOCK.
+ wantBytes := int(pipeSize) * 2
writeBuffer := make([]byte, wantBytes)
for i := 0; i < wantBytes; i++ {
writeBuffer[i] = 'a'
}
iov := usermem.BytesIOSequence(writeBuffer)
n, err := p.Write(ctx, file, iov, 0)
- total := n
- iov = iov.DropFirst64(n)
if err != syserror.ErrWouldBlock {
t.Fatalf("Writev got error %v, want %v", err, syserror.ErrWouldBlock)
}
+ if n != int64(pipeSize) {
+ t.Fatalf("Writev partial write, got: %v, want %v", n, pipeSize)
+ }
+ total := n
+ iov = iov.DropFirst64(n)
// Read the entire pipe buf size to make space for the second half.
- throwAway := make([]byte, 65536)
- if n, err := syscall.Read(fds[0], throwAway); n != len(throwAway) || err != nil {
- t.Fatalf("write to pipe got (%d, %v), want (%d, nil)", n, err, len(throwAway))
+ readBuffer := make([]byte, n)
+ if n, err := syscall.Read(fds[0], readBuffer); n != len(readBuffer) || err != nil {
+ t.Fatalf("write to pipe got (%d, %v), want (%d, nil)", n, err, len(readBuffer))
+ }
+ if !bytes.Equal(readBuffer, writeBuffer[:len(readBuffer)]) {
+ t.Fatalf("wrong data read from pipe, got: %v, want: %v", readBuffer, writeBuffer)
}
// This time we should not block.
n, err = p.Write(ctx, file, iov, 0)
- total += n
if err != nil {
t.Fatalf("Writev got error %v, want nil", err)
}
+ if n != int64(pipeSize) {
+ t.Fatalf("Writev partial write, got: %v, want %v", n, pipeSize)
+ }
+ total += n
// Assert that the result we got back is cumulative.
if total != int64(wantBytes) {
diff --git a/pkg/sentry/fs/file.go b/pkg/sentry/fs/file.go
index f64954457..bb8117f89 100644
--- a/pkg/sentry/fs/file.go
+++ b/pkg/sentry/fs/file.go
@@ -20,17 +20,17 @@ import (
"sync/atomic"
"time"
- "gvisor.googlesource.com/gvisor/pkg/amutex"
- "gvisor.googlesource.com/gvisor/pkg/metric"
- "gvisor.googlesource.com/gvisor/pkg/refs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/lock"
- "gvisor.googlesource.com/gvisor/pkg/sentry/limits"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/uniqueid"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/amutex"
+ "gvisor.dev/gvisor/pkg/metric"
+ "gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs/lock"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/uniqueid"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
var (
@@ -130,14 +130,15 @@ type File struct {
// to false respectively.
func NewFile(ctx context.Context, dirent *Dirent, flags FileFlags, fops FileOperations) *File {
dirent.IncRef()
- f := &File{
+ f := File{
UniqueID: uniqueid.GlobalFromContext(ctx),
Dirent: dirent,
FileOperations: fops,
flags: flags,
}
f.mu.Init()
- return f
+ f.EnableLeakCheck("fs.File")
+ return &f
}
// DecRef destroys the File when it is no longer referenced.
@@ -267,7 +268,7 @@ func (f *File) Readv(ctx context.Context, dst usermem.IOSequence) (int64, error)
reads.Increment()
n, err := f.FileOperations.Read(ctx, f, dst, f.offset)
- if n > 0 {
+ if n > 0 && !f.flags.NonSeekable {
atomic.AddInt64(&f.offset, n)
}
f.mu.Unlock()
@@ -310,9 +311,11 @@ func (f *File) Writev(ctx context.Context, src usermem.IOSequence) (int64, error
return 0, syserror.ErrInterrupted
}
+ unlockAppendMu := f.Dirent.Inode.lockAppendMu(f.Flags().Append)
// Handle append mode.
if f.Flags().Append {
if err := f.offsetForAppend(ctx, &f.offset); err != nil {
+ unlockAppendMu()
f.mu.Unlock()
return 0, err
}
@@ -322,6 +325,7 @@ func (f *File) Writev(ctx context.Context, src usermem.IOSequence) (int64, error
limit, ok := f.checkLimit(ctx, f.offset)
switch {
case ok && limit == 0:
+ unlockAppendMu()
f.mu.Unlock()
return 0, syserror.ErrExceedsFileSizeLimit
case ok:
@@ -330,9 +334,10 @@ func (f *File) Writev(ctx context.Context, src usermem.IOSequence) (int64, error
// We must hold the lock during the write.
n, err := f.FileOperations.Write(ctx, f, src, f.offset)
- if n >= 0 {
+ if n >= 0 && !f.flags.NonSeekable {
atomic.StoreInt64(&f.offset, f.offset+n)
}
+ unlockAppendMu()
f.mu.Unlock()
return n, err
}
@@ -348,13 +353,11 @@ func (f *File) Pwritev(ctx context.Context, src usermem.IOSequence, offset int64
// However, on Linux, if a file is opened with O_APPEND, pwrite()
// appends data to the end of the file, regardless of the value of
// offset."
+ unlockAppendMu := f.Dirent.Inode.lockAppendMu(f.Flags().Append)
+ defer unlockAppendMu()
+
if f.Flags().Append {
- if !f.mu.Lock(ctx) {
- return 0, syserror.ErrInterrupted
- }
- defer f.mu.Unlock()
if err := f.offsetForAppend(ctx, &offset); err != nil {
- f.mu.Unlock()
return 0, err
}
}
@@ -373,7 +376,7 @@ func (f *File) Pwritev(ctx context.Context, src usermem.IOSequence, offset int64
// offsetForAppend sets the given offset to the end of the file.
//
-// Precondition: the underlying file mutex should be held.
+// Precondition: the file.Dirent.Inode.appendMu mutex should be held for writing.
func (f *File) offsetForAppend(ctx context.Context, offset *int64) error {
uattr, err := f.Dirent.Inode.UnstableAttr(ctx)
if err != nil {
diff --git a/pkg/sentry/fs/file_operations.go b/pkg/sentry/fs/file_operations.go
index 0f2dfa273..d86f5bf45 100644
--- a/pkg/sentry/fs/file_operations.go
+++ b/pkg/sentry/fs/file_operations.go
@@ -15,11 +15,11 @@
package fs
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// SpliceOpts define how a splice works.
@@ -155,5 +155,16 @@ type FileOperations interface {
// refer.
//
// Preconditions: The AddressSpace (if any) that io refers to is activated.
- Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error)
+ Ioctl(ctx context.Context, file *File, io usermem.IO, args arch.SyscallArguments) (uintptr, error)
+}
+
+// FifoSizer is an interface for setting and getting the size of a pipe.
+type FifoSizer interface {
+ // FifoSize returns the pipe capacity in bytes.
+ FifoSize(ctx context.Context, file *File) (int64, error)
+
+ // SetFifoSize sets the new pipe capacity in bytes.
+ //
+ // The new size is returned (which may be capped).
+ SetFifoSize(size int64) (int64, error)
}
diff --git a/pkg/sentry/fs/file_overlay.go b/pkg/sentry/fs/file_overlay.go
index 273de1e14..9820f0b13 100644
--- a/pkg/sentry/fs/file_overlay.go
+++ b/pkg/sentry/fs/file_overlay.go
@@ -17,13 +17,13 @@ package fs
import (
"sync"
- "gvisor.googlesource.com/gvisor/pkg/refs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// overlayFile gets a handle to a file from the upper or lower filesystem
@@ -85,12 +85,6 @@ type overlayFileOperations struct {
// protected by File.mu of the owning file, which is held during
// Readdir and Seek calls.
dirCursor string
-
- // dirCacheMu protects dirCache.
- dirCacheMu sync.RWMutex `state:"nosave"`
-
- // dirCache is cache of DentAttrs from upper and lower Inodes.
- dirCache *SortedDentryMap
}
// Release implements FileOperations.Release.
@@ -171,53 +165,68 @@ func (f *overlayFileOperations) Readdir(ctx context.Context, file *File, seriali
if root != nil {
defer root.DecRef()
}
+
dirCtx := &DirCtx{
Serializer: serializer,
DirCursor: &f.dirCursor,
}
+ return DirentReaddir(ctx, file.Dirent, f, root, dirCtx, file.Offset())
+}
- // If the directory dirent is frozen, then DirentReaddir will calculate
- // the children based off the frozen dirent tree. There is no need to
- // call readdir on the upper/lower layers.
- if file.Dirent.frozen {
- return DirentReaddir(ctx, file.Dirent, f, root, dirCtx, file.Offset())
+// IterateDir implements DirIterator.IterateDir.
+func (f *overlayFileOperations) IterateDir(ctx context.Context, d *Dirent, dirCtx *DirCtx, offset int) (int, error) {
+ o := d.Inode.overlay
+
+ if !d.Inode.MountSource.CacheReaddir() {
+ // Can't use the dirCache. Simply read the entries.
+ entries, err := readdirEntries(ctx, o)
+ if err != nil {
+ return offset, err
+ }
+ n, err := GenericReaddir(dirCtx, entries)
+ return offset + n, err
}
- // Otherwise proceed with usual overlay readdir.
- o := file.Dirent.Inode.overlay
+ // Otherwise, use or create cached entries.
+
+ o.dirCacheMu.RLock()
+ if o.dirCache != nil {
+ n, err := GenericReaddir(dirCtx, o.dirCache)
+ o.dirCacheMu.RUnlock()
+ return offset + n, err
+ }
+ o.dirCacheMu.RUnlock()
// readdirEntries holds o.copyUpMu to ensure that copy-up does not
- // occur while calculating the readir results.
+ // occur while calculating the readdir results.
//
// However, it is possible for a copy-up to occur after the call to
- // readdirEntries, but before setting f.dirCache. This is OK, since
- // copy-up only does not change the children in a way that would affect
- // the children returned in dirCache. Copy-up only moves
- // files/directories between layers in the overlay.
+ // readdirEntries, but before setting o.dirCache. This is OK, since
+ // copy-up does not change the children in a way that would affect the
+ // children returned in dirCache. Copy-up only moves files/directories
+ // between layers in the overlay.
//
- // It is also possible for Readdir to race with a Create operation
- // (which may trigger a copy-up during it's execution). Depending on
- // whether the Create happens before or after the readdirEntries call,
- // the newly created file may or may not appear in the readdir results.
- // But this can only be caused by a real race between readdir and
- // create syscalls, so it's also OK.
- dirCache, err := readdirEntries(ctx, o)
- if err != nil {
- return file.Offset(), err
+ // We must hold dirCacheMu around both readdirEntries and setting
+ // o.dirCache to synchronize with dirCache invalidations done by
+ // Create, Remove, Rename.
+ o.dirCacheMu.Lock()
+
+ // We expect dirCache to be nil (we just checked above), but there is a
+ // chance that a racing call managed to just set it, in which case we
+ // can use that new value.
+ if o.dirCache == nil {
+ dirCache, err := readdirEntries(ctx, o)
+ if err != nil {
+ o.dirCacheMu.Unlock()
+ return offset, err
+ }
+ o.dirCache = dirCache
}
- f.dirCacheMu.Lock()
- f.dirCache = dirCache
- f.dirCacheMu.Unlock()
+ o.dirCacheMu.DowngradeLock()
+ n, err := GenericReaddir(dirCtx, o.dirCache)
+ o.dirCacheMu.RUnlock()
- return DirentReaddir(ctx, file.Dirent, f, root, dirCtx, file.Offset())
-}
-
-// IterateDir implements DirIterator.IterateDir.
-func (f *overlayFileOperations) IterateDir(ctx context.Context, dirCtx *DirCtx, offset int) (int, error) {
- f.dirCacheMu.RLock()
- n, err := GenericReaddir(dirCtx, f.dirCache)
- f.dirCacheMu.RUnlock()
return offset + n, err
}
@@ -338,13 +347,14 @@ func (*overlayFileOperations) ConfigureMMap(ctx context.Context, file *File, opt
// preventing us from saving a proper inode mapping for the
// file.
file.IncRef()
- id := &overlayMappingIdentity{
+ id := overlayMappingIdentity{
id: opts.MappingIdentity,
overlayFile: file,
}
+ id.EnableLeakCheck("fs.overlayMappingIdentity")
// Swap out the old MappingIdentity for the wrapped one.
- opts.MappingIdentity = id
+ opts.MappingIdentity = &id
return nil
}
@@ -388,9 +398,49 @@ func (f *overlayFileOperations) UnstableAttr(ctx context.Context, file *File) (U
return f.lower.UnstableAttr(ctx)
}
-// Ioctl implements fs.FileOperations.Ioctl and always returns ENOTTY.
-func (*overlayFileOperations) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
- return 0, syserror.ENOTTY
+// Ioctl implements fs.FileOperations.Ioctl.
+func (f *overlayFileOperations) Ioctl(ctx context.Context, overlayFile *File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+ f.upperMu.Lock()
+ defer f.upperMu.Unlock()
+
+ if f.upper == nil {
+ // It's possible that ioctl changes the file. Since we don't know all
+ // possible ioctls, only allow them to propagate to the upper. Triggering a
+ // copy up on any ioctl would be too drastic. In the future, it can have a
+ // list of ioctls that are safe to send to lower and a list that triggers a
+ // copy up.
+ return 0, syserror.ENOTTY
+ }
+ return f.upper.FileOperations.Ioctl(ctx, f.upper, io, args)
+}
+
+// FifoSize implements FifoSizer.FifoSize.
+func (f *overlayFileOperations) FifoSize(ctx context.Context, overlayFile *File) (rv int64, err error) {
+ err = f.onTop(ctx, overlayFile, func(file *File, ops FileOperations) error {
+ sz, ok := ops.(FifoSizer)
+ if !ok {
+ return syserror.EINVAL
+ }
+ rv, err = sz.FifoSize(ctx, file)
+ return err
+ })
+ return
+}
+
+// SetFifoSize implements FifoSizer.SetFifoSize.
+func (f *overlayFileOperations) SetFifoSize(size int64) (rv int64, err error) {
+ f.upperMu.Lock()
+ defer f.upperMu.Unlock()
+
+ if f.upper == nil {
+ // Named pipes cannot be copied up and changes to the lower are prohibited.
+ return 0, syserror.EINVAL
+ }
+ sz, ok := f.upper.FileOperations.(FifoSizer)
+ if !ok {
+ return 0, syserror.EINVAL
+ }
+ return sz.SetFifoSize(size)
}
// readdirEntries returns a sorted map of directory entries from the
diff --git a/pkg/sentry/fs/file_overlay_test.go b/pkg/sentry/fs/file_overlay_test.go
index 6a2b8007c..2fb824d5c 100644
--- a/pkg/sentry/fs/file_overlay_test.go
+++ b/pkg/sentry/fs/file_overlay_test.go
@@ -18,18 +18,18 @@ import (
"reflect"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/fs/ramfs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest"
)
func TestReaddir(t *testing.T) {
ctx := contexttest.Context(t)
ctx = &rootContext{
Context: ctx,
- root: fs.NewDirent(newTestRamfsDir(ctx, nil, nil), "root"),
+ root: fs.NewDirent(ctx, newTestRamfsDir(ctx, nil, nil), "root"),
}
for _, test := range []struct {
// Test description.
@@ -103,7 +103,7 @@ func TestReaddir(t *testing.T) {
},
} {
t.Run(test.desc, func(t *testing.T) {
- openDir, err := test.dir.GetFile(ctx, fs.NewDirent(test.dir, "stub"), fs.FileFlags{Read: true})
+ openDir, err := test.dir.GetFile(ctx, fs.NewDirent(ctx, test.dir, "stub"), fs.FileFlags{Read: true})
if err != nil {
t.Fatalf("GetFile got error %v, want nil", err)
}
@@ -126,7 +126,7 @@ func TestReaddirRevalidation(t *testing.T) {
ctx := contexttest.Context(t)
ctx = &rootContext{
Context: ctx,
- root: fs.NewDirent(newTestRamfsDir(ctx, nil, nil), "root"),
+ root: fs.NewDirent(ctx, newTestRamfsDir(ctx, nil, nil), "root"),
}
// Create an overlay with two directories, each with one file.
@@ -139,7 +139,7 @@ func TestReaddirRevalidation(t *testing.T) {
upperDir := upper.InodeOperations.(*dir).InodeOperations.(*ramfs.Dir)
// Check that overlay returns the files from both upper and lower.
- openDir, err := overlay.GetFile(ctx, fs.NewDirent(overlay, "stub"), fs.FileFlags{Read: true})
+ openDir, err := overlay.GetFile(ctx, fs.NewDirent(ctx, overlay, "stub"), fs.FileFlags{Read: true})
if err != nil {
t.Fatalf("GetFile got error %v, want nil", err)
}
@@ -156,7 +156,7 @@ func TestReaddirRevalidation(t *testing.T) {
if err := upperDir.Remove(ctx, upper, "a"); err != nil {
t.Fatalf("error removing child: %v", err)
}
- upperDir.AddChild(ctx, "c", fs.NewInode(fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermissions{}, 0),
+ upperDir.AddChild(ctx, "c", fs.NewInode(ctx, fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermissions{}, 0),
upper.MountSource, fs.StableAttr{Type: fs.RegularFile}))
// Seek to beginning of the directory and do the readdir again.
@@ -186,7 +186,7 @@ func TestReaddirOverlayFrozen(t *testing.T) {
overlayInode := fs.NewTestOverlayDir(ctx, upper, lower, false)
// Set that overlay as the root.
- root := fs.NewDirent(overlayInode, "root")
+ root := fs.NewDirent(ctx, overlayInode, "root")
ctx = &rootContext{
Context: ctx,
root: root,
diff --git a/pkg/sentry/fs/filesystems.go b/pkg/sentry/fs/filesystems.go
index acd84dfcc..b157fd228 100644
--- a/pkg/sentry/fs/filesystems.go
+++ b/pkg/sentry/fs/filesystems.go
@@ -20,7 +20,7 @@ import (
"strings"
"sync"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/context"
)
// FilesystemFlags matches include/linux/fs.h:file_system_type.fs_flags.
diff --git a/pkg/sentry/fs/filetest/BUILD b/pkg/sentry/fs/filetest/BUILD
index 05ca72aa0..a9d6d9301 100644
--- a/pkg/sentry/fs/filetest/BUILD
+++ b/pkg/sentry/fs/filetest/BUILD
@@ -6,7 +6,7 @@ go_library(
name = "filetest",
testonly = 1,
srcs = ["filetest.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/filetest",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/fs/filetest",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/sentry/context",
diff --git a/pkg/sentry/fs/filetest/filetest.go b/pkg/sentry/fs/filetest/filetest.go
index c0b1b088d..22270a494 100644
--- a/pkg/sentry/fs/filetest/filetest.go
+++ b/pkg/sentry/fs/filetest/filetest.go
@@ -19,13 +19,13 @@ import (
"fmt"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/anon"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/anon"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// TestFileOperations is an implementation of the File interface. It provides all
@@ -46,7 +46,7 @@ type TestFileOperations struct {
// NewTestFile creates and initializes a new test file.
func NewTestFile(tb testing.TB) *fs.File {
ctx := contexttest.Context(tb)
- dirent := fs.NewDirent(anon.NewInode(ctx), "test")
+ dirent := fs.NewDirent(ctx, anon.NewInode(ctx), "test")
return fs.NewFile(ctx, dirent, fs.FileFlags{}, &TestFileOperations{})
}
diff --git a/pkg/sentry/fs/flags.go b/pkg/sentry/fs/flags.go
index 5c8cb773f..1278f9c78 100644
--- a/pkg/sentry/fs/flags.go
+++ b/pkg/sentry/fs/flags.go
@@ -15,7 +15,7 @@
package fs
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/abi/linux"
)
// FileFlags encodes file flags.
@@ -57,6 +57,9 @@ type FileFlags struct {
// Linux sets this flag for all files. Since gVisor is only compatible
// with 64-bit Linux, it also sets this flag for all files.
LargeFile bool
+
+ // NonSeekable indicates that file.offset isn't used.
+ NonSeekable bool
}
// SettableFileFlags is a subset of FileFlags above that can be changed
diff --git a/pkg/sentry/fs/fs.go b/pkg/sentry/fs/fs.go
index 632055cce..8b2a5e6b2 100644
--- a/pkg/sentry/fs/fs.go
+++ b/pkg/sentry/fs/fs.go
@@ -56,8 +56,8 @@ package fs
import (
"sync"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/context"
)
var (
diff --git a/pkg/sentry/fs/fsutil/BUILD b/pkg/sentry/fs/fsutil/BUILD
index 44f43b965..6499f87ac 100644
--- a/pkg/sentry/fs/fsutil/BUILD
+++ b/pkg/sentry/fs/fsutil/BUILD
@@ -7,8 +7,8 @@ go_template_instance(
name = "dirty_set_impl",
out = "dirty_set_impl.go",
imports = {
- "memmap": "gvisor.googlesource.com/gvisor/pkg/sentry/memmap",
- "platform": "gvisor.googlesource.com/gvisor/pkg/sentry/platform",
+ "memmap": "gvisor.dev/gvisor/pkg/sentry/memmap",
+ "platform": "gvisor.dev/gvisor/pkg/sentry/platform",
},
package = "fsutil",
prefix = "Dirty",
@@ -25,7 +25,7 @@ go_template_instance(
name = "frame_ref_set_impl",
out = "frame_ref_set_impl.go",
imports = {
- "platform": "gvisor.googlesource.com/gvisor/pkg/sentry/platform",
+ "platform": "gvisor.dev/gvisor/pkg/sentry/platform",
},
package = "fsutil",
prefix = "frameRef",
@@ -42,8 +42,8 @@ go_template_instance(
name = "file_range_set_impl",
out = "file_range_set_impl.go",
imports = {
- "memmap": "gvisor.googlesource.com/gvisor/pkg/sentry/memmap",
- "platform": "gvisor.googlesource.com/gvisor/pkg/sentry/platform",
+ "memmap": "gvisor.dev/gvisor/pkg/sentry/memmap",
+ "platform": "gvisor.dev/gvisor/pkg/sentry/platform",
},
package = "fsutil",
prefix = "FileRange",
@@ -74,7 +74,7 @@ go_library(
"inode.go",
"inode_cached.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/fs/fsutil",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
diff --git a/pkg/sentry/fs/fsutil/dirty_set.go b/pkg/sentry/fs/fsutil/dirty_set.go
index f1451d77a..12132680b 100644
--- a/pkg/sentry/fs/fsutil/dirty_set.go
+++ b/pkg/sentry/fs/fsutil/dirty_set.go
@@ -17,11 +17,11 @@ package fsutil
import (
"math"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/safemem"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// DirtySet maps offsets into a memmap.Mappable to DirtyInfo. It is used to
diff --git a/pkg/sentry/fs/fsutil/dirty_set_test.go b/pkg/sentry/fs/fsutil/dirty_set_test.go
index d9c68baa3..75575d994 100644
--- a/pkg/sentry/fs/fsutil/dirty_set_test.go
+++ b/pkg/sentry/fs/fsutil/dirty_set_test.go
@@ -18,8 +18,8 @@ import (
"reflect"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
func TestDirtySet(t *testing.T) {
diff --git a/pkg/sentry/fs/fsutil/file.go b/pkg/sentry/fs/fsutil/file.go
index 9381963d0..626b9126a 100644
--- a/pkg/sentry/fs/fsutil/file.go
+++ b/pkg/sentry/fs/fsutil/file.go
@@ -15,13 +15,13 @@
package fsutil
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// FileNoopRelease implements fs.FileOperations.Release for files that have no
@@ -219,7 +219,7 @@ func GenericConfigureMMap(file *fs.File, m memmap.Mappable, opts *memmap.MMapOpt
type FileNoIoctl struct{}
// Ioctl implements fs.FileOperations.Ioctl.
-func (FileNoIoctl) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+func (FileNoIoctl) Ioctl(context.Context, *fs.File, usermem.IO, arch.SyscallArguments) (uintptr, error) {
return 0, syserror.ENOTTY
}
@@ -285,7 +285,7 @@ func NewStaticDirFileOperations(dentries *fs.SortedDentryMap) *StaticDirFileOper
}
// IterateDir implements DirIterator.IterateDir.
-func (sdfo *StaticDirFileOperations) IterateDir(ctx context.Context, dirCtx *fs.DirCtx, offset int) (int, error) {
+func (sdfo *StaticDirFileOperations) IterateDir(ctx context.Context, d *fs.Dirent, dirCtx *fs.DirCtx, offset int) (int, error) {
n, err := fs.GenericReaddir(dirCtx, sdfo.dentryMap)
return offset + n, err
}
diff --git a/pkg/sentry/fs/fsutil/file_range_set.go b/pkg/sentry/fs/fsutil/file_range_set.go
index b5ac6c71c..0a5466b0a 100644
--- a/pkg/sentry/fs/fsutil/file_range_set.go
+++ b/pkg/sentry/fs/fsutil/file_range_set.go
@@ -19,13 +19,13 @@ import (
"io"
"math"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/pgalloc"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/safemem"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// FileRangeSet maps offsets into a memmap.Mappable to offsets into a
diff --git a/pkg/sentry/fs/fsutil/frame_ref_set.go b/pkg/sentry/fs/fsutil/frame_ref_set.go
index 6565c28c8..dd63db32b 100644
--- a/pkg/sentry/fs/fsutil/frame_ref_set.go
+++ b/pkg/sentry/fs/fsutil/frame_ref_set.go
@@ -17,7 +17,7 @@ package fsutil
import (
"math"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
)
type frameRefSetFunctions struct{}
diff --git a/pkg/sentry/fs/fsutil/host_file_mapper.go b/pkg/sentry/fs/fsutil/host_file_mapper.go
index 2bdfc0db6..e239f12a5 100644
--- a/pkg/sentry/fs/fsutil/host_file_mapper.go
+++ b/pkg/sentry/fs/fsutil/host_file_mapper.go
@@ -19,11 +19,11 @@ import (
"sync"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/safemem"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// HostFileMapper caches mappings of an arbitrary host file descriptor. It is
diff --git a/pkg/sentry/fs/fsutil/host_file_mapper_unsafe.go b/pkg/sentry/fs/fsutil/host_file_mapper_unsafe.go
index 7167be263..ad11a0573 100644
--- a/pkg/sentry/fs/fsutil/host_file_mapper_unsafe.go
+++ b/pkg/sentry/fs/fsutil/host_file_mapper_unsafe.go
@@ -17,7 +17,7 @@ package fsutil
import (
"unsafe"
- "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
+ "gvisor.dev/gvisor/pkg/sentry/safemem"
)
func (*HostFileMapper) unsafeBlockFromChunkMapping(addr uintptr) safemem.Block {
diff --git a/pkg/sentry/fs/fsutil/host_mappable.go b/pkg/sentry/fs/fsutil/host_mappable.go
index ad0518b8f..d2495cb83 100644
--- a/pkg/sentry/fs/fsutil/host_mappable.go
+++ b/pkg/sentry/fs/fsutil/host_mappable.go
@@ -18,12 +18,12 @@ import (
"math"
"sync"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/safemem"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// HostMappable implements memmap.Mappable and platform.File over a
diff --git a/pkg/sentry/fs/fsutil/inode.go b/pkg/sentry/fs/fsutil/inode.go
index 925887335..4e100a402 100644
--- a/pkg/sentry/fs/fsutil/inode.go
+++ b/pkg/sentry/fs/fsutil/inode.go
@@ -17,13 +17,13 @@ package fsutil
import (
"sync"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// SimpleFileInode is a simple implementation of InodeOperations.
diff --git a/pkg/sentry/fs/fsutil/inode_cached.go b/pkg/sentry/fs/fsutil/inode_cached.go
index 7bee2eb5f..ed62049a9 100644
--- a/pkg/sentry/fs/fsutil/inode_cached.go
+++ b/pkg/sentry/fs/fsutil/inode_cached.go
@@ -19,17 +19,17 @@ import (
"io"
"sync"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/pgalloc"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/safemem"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// Lock order (compare the lock order model in mm/mm.go):
diff --git a/pkg/sentry/fs/fsutil/inode_cached_test.go b/pkg/sentry/fs/fsutil/inode_cached_test.go
index be3d4b6fc..dc19255ed 100644
--- a/pkg/sentry/fs/fsutil/inode_cached_test.go
+++ b/pkg/sentry/fs/fsutil/inode_cached_test.go
@@ -19,14 +19,14 @@ import (
"io"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/safemem"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
type noopBackingFile struct{}
@@ -253,11 +253,11 @@ func (noopMappingSpace) Invalidate(ar usermem.AddrRange, opts memmap.InvalidateO
}
func anonInode(ctx context.Context) *fs.Inode {
- return fs.NewInode(&SimpleFileInode{
+ return fs.NewInode(ctx, &SimpleFileInode{
InodeSimpleAttributes: NewInodeSimpleAttributes(ctx, fs.FileOwnerFromContext(ctx), fs.FilePermissions{
User: fs.PermMask{Read: true, Write: true},
}, 0),
- }, fs.NewPseudoMountSource(), fs.StableAttr{
+ }, fs.NewPseudoMountSource(ctx), fs.StableAttr{
Type: fs.Anonymous,
BlockSize: usermem.PageSize,
})
@@ -276,7 +276,7 @@ func TestRead(t *testing.T) {
// Construct a 3-page file.
buf := pagesOf('a', 'b', 'c')
- file := fs.NewFile(ctx, fs.NewDirent(anonInode(ctx), "anon"), fs.FileFlags{}, nil)
+ file := fs.NewFile(ctx, fs.NewDirent(ctx, anonInode(ctx), "anon"), fs.FileFlags{}, nil)
uattr := fs.UnstableAttr{
Size: int64(len(buf)),
}
diff --git a/pkg/sentry/fs/g3doc/inotify.md b/pkg/sentry/fs/g3doc/inotify.md
index 1e99a3357..71a577d9d 100644
--- a/pkg/sentry/fs/g3doc/inotify.md
+++ b/pkg/sentry/fs/g3doc/inotify.md
@@ -9,7 +9,7 @@ For the most part, the sentry implementation of inotify mirrors the Linux
architecture. Inotify instances (i.e. the fd returned by inotify_init(2)) are
backed by a pseudo-filesystem. Events are generated from various places in the
sentry, including the [syscall layer][syscall_dir], the [vfs layer][dirent] and
-the [process fd table][fd_map]. Watches are stored in inodes and generated
+the [process fd table][fd_table]. Watches are stored in inodes and generated
events are queued to the inotify instance owning the watches for delivery to the
user.
@@ -112,11 +112,11 @@ attempts to queue a new event, it is already holding `fs.Watches.mu`. If we used
`Inotify.mu` to also protect the event queue, this would violate the above lock
ordering.
-[dirent]: https://gvisor.googlesource.com/gvisor/+/master/pkg/sentry/fs/dirent.go
-[event]: https://gvisor.googlesource.com/gvisor/+/master/pkg/sentry/fs/inotify_event.go
-[fd_map]: https://gvisor.googlesource.com/gvisor/+/master/pkg/sentry/kernel/fd_map.go
-[inode]: https://gvisor.googlesource.com/gvisor/+/master/pkg/sentry/fs/inode.go
-[inode_watches]: https://gvisor.googlesource.com/gvisor/+/master/pkg/sentry/fs/inode_inotify.go
-[inotify]: https://gvisor.googlesource.com/gvisor/+/master/pkg/sentry/fs/inotify.go
-[syscall_dir]: https://gvisor.googlesource.com/gvisor/+/master/pkg/sentry/syscalls/linux/
-[watch]: https://gvisor.googlesource.com/gvisor/+/master/pkg/sentry/fs/inotify_watch.go
+[dirent]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/fs/dirent.go
+[event]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/fs/inotify_event.go
+[fd_table]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/kernel/fd_table.go
+[inode]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/fs/inode.go
+[inode_watches]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/fs/inode_inotify.go
+[inotify]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/fs/inotify.go
+[syscall_dir]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/syscalls/linux/
+[watch]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/fs/inotify_watch.go
diff --git a/pkg/sentry/fs/gofer/BUILD b/pkg/sentry/fs/gofer/BUILD
index f2c79b475..6b993928c 100644
--- a/pkg/sentry/fs/gofer/BUILD
+++ b/pkg/sentry/fs/gofer/BUILD
@@ -21,7 +21,7 @@ go_library(
"socket.go",
"util.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/gofer",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/fs/gofer",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
diff --git a/pkg/sentry/fs/gofer/attr.go b/pkg/sentry/fs/gofer/attr.go
index c572f3396..4848e2374 100644
--- a/pkg/sentry/fs/gofer/attr.go
+++ b/pkg/sentry/fs/gofer/attr.go
@@ -17,12 +17,12 @@ package gofer
import (
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/p9"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// getattr returns the 9p attributes of the p9.File. On success, Mode, Size, and RDev
diff --git a/pkg/sentry/fs/gofer/cache_policy.go b/pkg/sentry/fs/gofer/cache_policy.go
index c59344589..cc11c6339 100644
--- a/pkg/sentry/fs/gofer/cache_policy.go
+++ b/pkg/sentry/fs/gofer/cache_policy.go
@@ -17,8 +17,8 @@ package gofer
import (
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
)
// cachePolicy is a 9p cache policy. It has methods that determine what to
diff --git a/pkg/sentry/fs/gofer/context_file.go b/pkg/sentry/fs/gofer/context_file.go
index be53ac4d9..44b72582a 100644
--- a/pkg/sentry/fs/gofer/context_file.go
+++ b/pkg/sentry/fs/gofer/context_file.go
@@ -15,9 +15,9 @@
package gofer
import (
- "gvisor.googlesource.com/gvisor/pkg/fd"
- "gvisor.googlesource.com/gvisor/pkg/p9"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/fd"
+ "gvisor.dev/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/pkg/sentry/context"
)
// contextFile is a wrapper around p9.File that notifies the context that
diff --git a/pkg/sentry/fs/gofer/device.go b/pkg/sentry/fs/gofer/device.go
index 1de6c247c..cbd3c5da2 100644
--- a/pkg/sentry/fs/gofer/device.go
+++ b/pkg/sentry/fs/gofer/device.go
@@ -14,7 +14,7 @@
package gofer
-import "gvisor.googlesource.com/gvisor/pkg/sentry/device"
+import "gvisor.dev/gvisor/pkg/sentry/device"
// goferDevice is the gofer virtual device.
var goferDevice = device.NewAnonMultiDevice()
diff --git a/pkg/sentry/fs/gofer/file.go b/pkg/sentry/fs/gofer/file.go
index fb4f50113..9e2e412cd 100644
--- a/pkg/sentry/fs/gofer/file.go
+++ b/pkg/sentry/fs/gofer/file.go
@@ -19,17 +19,17 @@ import (
"syscall"
"time"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/metric"
- "gvisor.googlesource.com/gvisor/pkg/p9"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/device"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/metric"
+ "gvisor.dev/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/device"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
var (
@@ -137,7 +137,7 @@ func (f *fileOperations) Readdir(ctx context.Context, file *fs.File, serializer
}
// IterateDir implements fs.DirIterator.IterateDir.
-func (f *fileOperations) IterateDir(ctx context.Context, dirCtx *fs.DirCtx, offset int) (int, error) {
+func (f *fileOperations) IterateDir(ctx context.Context, d *fs.Dirent, dirCtx *fs.DirCtx, offset int) (int, error) {
f.inodeOperations.readdirMu.Lock()
defer f.inodeOperations.readdirMu.Unlock()
diff --git a/pkg/sentry/fs/gofer/file_state.go b/pkg/sentry/fs/gofer/file_state.go
index 31264e065..9aa68a70e 100644
--- a/pkg/sentry/fs/gofer/file_state.go
+++ b/pkg/sentry/fs/gofer/file_state.go
@@ -17,8 +17,8 @@ package gofer
import (
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
)
// afterLoad is invoked by stateify.
diff --git a/pkg/sentry/fs/gofer/fs.go b/pkg/sentry/fs/gofer/fs.go
index 6ab89fcc2..69999dc28 100644
--- a/pkg/sentry/fs/gofer/fs.go
+++ b/pkg/sentry/fs/gofer/fs.go
@@ -20,9 +20,9 @@ import (
"fmt"
"strconv"
- "gvisor.googlesource.com/gvisor/pkg/p9"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
)
// The following are options defined by the Linux 9p client that we support,
diff --git a/pkg/sentry/fs/gofer/gofer_test.go b/pkg/sentry/fs/gofer/gofer_test.go
index 29d34da7e..7fc3c32ae 100644
--- a/pkg/sentry/fs/gofer/gofer_test.go
+++ b/pkg/sentry/fs/gofer/gofer_test.go
@@ -20,11 +20,11 @@ import (
"testing"
"time"
- "gvisor.googlesource.com/gvisor/pkg/p9"
- "gvisor.googlesource.com/gvisor/pkg/p9/p9test"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/pkg/p9/p9test"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
)
// rootTest runs a test with a p9 mock and an fs.InodeOperations created from
@@ -62,8 +62,8 @@ func rootTest(t *testing.T, name string, cp cachePolicy, fn func(context.Context
sattr, rootInodeOperations := newInodeOperations(ctx, s, contextFile{
file: rootFile,
}, root.QID, p9.AttrMaskAll(), root.Attr, false /* socket */)
- m := fs.NewMountSource(s, &filesystem{}, fs.MountSourceFlags{})
- rootInode := fs.NewInode(rootInodeOperations, m, sattr)
+ m := fs.NewMountSource(ctx, s, &filesystem{}, fs.MountSourceFlags{})
+ rootInode := fs.NewInode(ctx, rootInodeOperations, m, sattr)
// Ensure that the cache is fully invalidated, so that any
// close actions actually take place before the full harness is
@@ -207,7 +207,7 @@ func TestRevalidation(t *testing.T) {
name := fmt.Sprintf("cachepolicy=%s", test.cachePolicy)
rootTest(t, name, test.cachePolicy, func(ctx context.Context, h *p9test.Harness, rootFile *p9test.Mock, rootInode *fs.Inode) {
// Wrap in a dirent object.
- rootDir := fs.NewDirent(rootInode, "root")
+ rootDir := fs.NewDirent(ctx, rootInode, "root")
// Create a mock file a child of the root. We save when
// this is generated, so that when the time changed, we
diff --git a/pkg/sentry/fs/gofer/handles.go b/pkg/sentry/fs/gofer/handles.go
index c7098cd36..27eeae3d9 100644
--- a/pkg/sentry/fs/gofer/handles.go
+++ b/pkg/sentry/fs/gofer/handles.go
@@ -17,14 +17,14 @@ package gofer
import (
"io"
- "gvisor.googlesource.com/gvisor/pkg/fd"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/p9"
- "gvisor.googlesource.com/gvisor/pkg/refs"
- "gvisor.googlesource.com/gvisor/pkg/secio"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
+ "gvisor.dev/gvisor/pkg/fd"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/secio"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/safemem"
)
// handles are the open handles of a gofer file. They are reference counted to
@@ -79,11 +79,12 @@ func newHandles(ctx context.Context, file contextFile, flags fs.FileFlags) (*han
newFile.close(ctx)
return nil, err
}
- h := &handles{
+ h := handles{
File: newFile,
Host: hostFile,
}
- return h, nil
+ h.EnableLeakCheck("gofer.handles")
+ return &h, nil
}
type handleReadWriter struct {
diff --git a/pkg/sentry/fs/gofer/inode.go b/pkg/sentry/fs/gofer/inode.go
index dcb3b2880..95b064aea 100644
--- a/pkg/sentry/fs/gofer/inode.go
+++ b/pkg/sentry/fs/gofer/inode.go
@@ -19,19 +19,19 @@ import (
"sync"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/fd"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/p9"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/device"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fdpipe"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/host"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/fd"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/device"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fdpipe"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/fs/host"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/safemem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// inodeOperations implements fs.InodeOperations.
diff --git a/pkg/sentry/fs/gofer/inode_state.go b/pkg/sentry/fs/gofer/inode_state.go
index ac22ee4b1..0b2eedb7c 100644
--- a/pkg/sentry/fs/gofer/inode_state.go
+++ b/pkg/sentry/fs/gofer/inode_state.go
@@ -20,11 +20,11 @@ import (
"path/filepath"
"strings"
- "gvisor.googlesource.com/gvisor/pkg/p9"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/device"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/device"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/time"
)
// Some fs implementations may not support atime, ctime, or mtime in getattr.
diff --git a/pkg/sentry/fs/gofer/path.go b/pkg/sentry/fs/gofer/path.go
index 092f8b586..8c17603f8 100644
--- a/pkg/sentry/fs/gofer/path.go
+++ b/pkg/sentry/fs/gofer/path.go
@@ -18,13 +18,13 @@ import (
"fmt"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/p9"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/device"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/device"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// maxFilenameLen is the maximum length of a filename. This is dictated by 9P's
@@ -73,7 +73,7 @@ func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string
sattr, node := newInodeOperations(ctx, i.fileState.s, newFile, qids[0], mask, p9attr, false)
// Construct a positive Dirent.
- return fs.NewDirent(fs.NewInode(node, dir.MountSource, sattr), name), nil
+ return fs.NewDirent(ctx, fs.NewInode(ctx, node, dir.MountSource, sattr), name), nil
}
// Creates a new Inode at name and returns its File based on the session's cache policy.
@@ -141,20 +141,21 @@ func (i *inodeOperations) Create(ctx context.Context, dir *fs.Inode, name string
sattr, iops := newInodeOperations(ctx, i.fileState.s, unopened, qid, mask, p9attr, false)
// Construct the positive Dirent.
- d := fs.NewDirent(fs.NewInode(iops, dir.MountSource, sattr), name)
+ d := fs.NewDirent(ctx, fs.NewInode(ctx, iops, dir.MountSource, sattr), name)
defer d.DecRef()
// Construct the new file, caching the handles if allowed.
- h := &handles{
+ h := handles{
File: newFile,
Host: hostFile,
}
+ h.EnableLeakCheck("gofer.handles")
if iops.fileState.canShareHandles() {
iops.fileState.handlesMu.Lock()
- iops.fileState.setSharedHandlesLocked(flags, h)
+ iops.fileState.setSharedHandlesLocked(flags, &h)
iops.fileState.handlesMu.Unlock()
}
- return NewFile(ctx, d, name, flags, iops, h), nil
+ return NewFile(ctx, d, name, flags, iops, &h), nil
}
// CreateLink uses Create to create a symlink between oldname and newname.
@@ -277,7 +278,7 @@ func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string,
sattr, iops := newInodeOperations(ctx, i.fileState.s, unopened, qid, mask, attr, true)
// Construct the positive Dirent.
- childDir := fs.NewDirent(fs.NewInode(iops, dir.MountSource, sattr), name)
+ childDir := fs.NewDirent(ctx, fs.NewInode(ctx, iops, dir.MountSource, sattr), name)
i.session().endpoints.add(key, childDir, ep)
return childDir, nil
}
diff --git a/pkg/sentry/fs/gofer/session.go b/pkg/sentry/fs/gofer/session.go
index 085a358fe..69d08a627 100644
--- a/pkg/sentry/fs/gofer/session.go
+++ b/pkg/sentry/fs/gofer/session.go
@@ -18,18 +18,18 @@ import (
"fmt"
"sync"
- "gvisor.googlesource.com/gvisor/pkg/p9"
- "gvisor.googlesource.com/gvisor/pkg/refs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/device"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport"
- "gvisor.googlesource.com/gvisor/pkg/unet"
+ "gvisor.dev/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/device"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
+ "gvisor.dev/gvisor/pkg/unet"
)
// DefaultDirentCacheSize is the default dirent cache size for 9P mounts. It can
-// be adjusted independentely from the other dirent caches.
+// be adjusted independently from the other dirent caches.
var DefaultDirentCacheSize uint64 = fs.DefaultDirentCacheSize
// +stateify savable
@@ -145,16 +145,21 @@ func (s *session) Destroy() {
s.client.Close()
}
-// Revalidate implements MountSource.Revalidate.
+// Revalidate implements MountSourceOperations.Revalidate.
func (s *session) Revalidate(ctx context.Context, name string, parent, child *fs.Inode) bool {
return s.cachePolicy.revalidate(ctx, name, parent, child)
}
-// Keep implements MountSource.Keep.
+// Keep implements MountSourceOperations.Keep.
func (s *session) Keep(d *fs.Dirent) bool {
return s.cachePolicy.keep(d)
}
+// CacheReaddir implements MountSourceOperations.CacheReaddir.
+func (s *session) CacheReaddir() bool {
+ return s.cachePolicy.cacheReaddir()
+}
+
// ResetInodeMappings implements fs.MountSourceOperations.ResetInodeMappings.
func (s *session) ResetInodeMappings() {
s.inodeMappings = make(map[uint64]string)
@@ -236,7 +241,7 @@ func Root(ctx context.Context, dev string, filesystem fs.Filesystem, superBlockF
}
// Construct the session.
- s := &session{
+ s := session{
connID: dev,
msize: o.msize,
version: o.version,
@@ -245,13 +250,14 @@ func Root(ctx context.Context, dev string, filesystem fs.Filesystem, superBlockF
superBlockFlags: superBlockFlags,
mounter: mounter,
}
+ s.EnableLeakCheck("gofer.session")
if o.privateunixsocket {
s.endpoints = newEndpointMaps()
}
// Construct the MountSource with the session and superBlockFlags.
- m := fs.NewMountSource(s, filesystem, superBlockFlags)
+ m := fs.NewMountSource(ctx, &s, filesystem, superBlockFlags)
// Given that gofer files can consume host FDs, restrict the number
// of files that can be held by the cache.
@@ -285,8 +291,8 @@ func Root(ctx context.Context, dev string, filesystem fs.Filesystem, superBlockF
return nil, err
}
- sattr, iops := newInodeOperations(ctx, s, s.attach, qid, valid, attr, false)
- return fs.NewInode(iops, m, sattr), nil
+ sattr, iops := newInodeOperations(ctx, &s, s.attach, qid, valid, attr, false)
+ return fs.NewInode(ctx, iops, m, sattr), nil
}
// newEndpointMaps creates a new endpointMaps.
diff --git a/pkg/sentry/fs/gofer/session_state.go b/pkg/sentry/fs/gofer/session_state.go
index 68fbf3417..d045e04ff 100644
--- a/pkg/sentry/fs/gofer/session_state.go
+++ b/pkg/sentry/fs/gofer/session_state.go
@@ -17,10 +17,10 @@ package gofer
import (
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/p9"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/unet"
+ "gvisor.dev/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/unet"
)
// beforeSave is invoked by stateify.
@@ -111,5 +111,4 @@ func (s *session) afterLoad() {
panic("failed to restore endpoint maps: " + err.Error())
}
}
-
}
diff --git a/pkg/sentry/fs/gofer/socket.go b/pkg/sentry/fs/gofer/socket.go
index 7ac0a421f..a45a8f36c 100644
--- a/pkg/sentry/fs/gofer/socket.go
+++ b/pkg/sentry/fs/gofer/socket.go
@@ -15,14 +15,15 @@
package gofer
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/p9"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/host"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport"
- "gvisor.googlesource.com/gvisor/pkg/syserr"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/host"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
+ "gvisor.dev/gvisor/pkg/syserr"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// BoundEndpoint returns a gofer-backed transport.BoundEndpoint.
@@ -75,7 +76,7 @@ func sockTypeToP9(t linux.SockType) (p9.ConnectFlags, bool) {
}
// BidirectionalConnect implements ConnectableEndpoint.BidirectionalConnect.
-func (e *endpoint) BidirectionalConnect(ce transport.ConnectingEndpoint, returnConnect func(transport.Receiver, transport.ConnectedEndpoint)) *syserr.Error {
+func (e *endpoint) BidirectionalConnect(ctx context.Context, ce transport.ConnectingEndpoint, returnConnect func(transport.Receiver, transport.ConnectedEndpoint)) *syserr.Error {
cf, ok := sockTypeToP9(ce.Type())
if !ok {
return syserr.ErrConnectionRefused
@@ -100,7 +101,7 @@ func (e *endpoint) BidirectionalConnect(ce transport.ConnectingEndpoint, returnC
return syserr.ErrConnectionRefused
}
- c, serr := host.NewConnectedEndpoint(hostFile, ce.WaiterQueue(), e.path)
+ c, serr := host.NewConnectedEndpoint(ctx, hostFile, ce.WaiterQueue(), e.path)
if serr != nil {
ce.Unlock()
log.Warningf("Gofer returned invalid host socket for BidirectionalConnect; file %+v flags %+v: %v", e.file, cf, serr)
@@ -116,13 +117,13 @@ func (e *endpoint) BidirectionalConnect(ce transport.ConnectingEndpoint, returnC
// UnidirectionalConnect implements
// transport.BoundEndpoint.UnidirectionalConnect.
-func (e *endpoint) UnidirectionalConnect() (transport.ConnectedEndpoint, *syserr.Error) {
+func (e *endpoint) UnidirectionalConnect(ctx context.Context) (transport.ConnectedEndpoint, *syserr.Error) {
hostFile, err := e.file.Connect(p9.DgramSocket)
if err != nil {
return nil, syserr.ErrConnectionRefused
}
- c, serr := host.NewConnectedEndpoint(hostFile, &waiter.Queue{}, e.path)
+ c, serr := host.NewConnectedEndpoint(ctx, hostFile, &waiter.Queue{}, e.path)
if serr != nil {
log.Warningf("Gofer returned invalid host socket for UnidirectionalConnect; file %+v: %v", e.file, serr)
return nil, serr
diff --git a/pkg/sentry/fs/gofer/util.go b/pkg/sentry/fs/gofer/util.go
index d0e1096ce..848e6812b 100644
--- a/pkg/sentry/fs/gofer/util.go
+++ b/pkg/sentry/fs/gofer/util.go
@@ -17,9 +17,9 @@ package gofer
import (
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/p9"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
)
func utimes(ctx context.Context, file contextFile, ts fs.TimeSpec) error {
diff --git a/pkg/sentry/fs/host/BUILD b/pkg/sentry/fs/host/BUILD
index ea2ca11bf..b1080fb1a 100644
--- a/pkg/sentry/fs/host/BUILD
+++ b/pkg/sentry/fs/host/BUILD
@@ -22,7 +22,7 @@ go_library(
"util.go",
"util_unsafe.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/host",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/fs/host",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
diff --git a/pkg/sentry/fs/host/control.go b/pkg/sentry/fs/host/control.go
index 9ebb9bbb3..5532ff5a0 100644
--- a/pkg/sentry/fs/host/control.go
+++ b/pkg/sentry/fs/host/control.go
@@ -17,10 +17,10 @@ package host
import (
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/control"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/socket/control"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
)
type scmRights struct {
diff --git a/pkg/sentry/fs/host/descriptor.go b/pkg/sentry/fs/host/descriptor.go
index ffcd57a94..2a4d1b291 100644
--- a/pkg/sentry/fs/host/descriptor.go
+++ b/pkg/sentry/fs/host/descriptor.go
@@ -19,9 +19,9 @@ import (
"path"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/fdnotifier"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/fdnotifier"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// descriptor wraps a host fd.
diff --git a/pkg/sentry/fs/host/descriptor_test.go b/pkg/sentry/fs/host/descriptor_test.go
index ff08e43af..4205981f5 100644
--- a/pkg/sentry/fs/host/descriptor_test.go
+++ b/pkg/sentry/fs/host/descriptor_test.go
@@ -20,8 +20,8 @@ import (
"syscall"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/fdnotifier"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/fdnotifier"
+ "gvisor.dev/gvisor/pkg/waiter"
)
func TestDescriptorRelease(t *testing.T) {
diff --git a/pkg/sentry/fs/host/device.go b/pkg/sentry/fs/host/device.go
index 055024c44..484f0b58b 100644
--- a/pkg/sentry/fs/host/device.go
+++ b/pkg/sentry/fs/host/device.go
@@ -15,7 +15,7 @@
package host
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/device"
+ "gvisor.dev/gvisor/pkg/sentry/device"
)
// hostFileDevice is the host file virtual device.
diff --git a/pkg/sentry/fs/host/file.go b/pkg/sentry/fs/host/file.go
index ad0a3ec85..f6c626f2c 100644
--- a/pkg/sentry/fs/host/file.go
+++ b/pkg/sentry/fs/host/file.go
@@ -18,18 +18,18 @@ import (
"fmt"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/fd"
- "gvisor.googlesource.com/gvisor/pkg/fdnotifier"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/secio"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/fd"
+ "gvisor.dev/gvisor/pkg/fdnotifier"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/secio"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/safemem"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// fileOperations implements fs.FileOperations for a host file descriptor.
@@ -109,7 +109,7 @@ func newFileFromDonatedFD(ctx context.Context, donated int, mounter fs.FileOwner
iops := inode.InodeOperations.(*inodeOperations)
name := fmt.Sprintf("host:[%d]", inode.StableAttr.InodeID)
- dirent := fs.NewDirent(inode, name)
+ dirent := fs.NewDirent(ctx, inode, name)
defer dirent.DecRef()
if isTTY {
@@ -179,7 +179,7 @@ func (f *fileOperations) Readdir(ctx context.Context, file *fs.File, serializer
}
// IterateDir implements fs.DirIterator.IterateDir.
-func (f *fileOperations) IterateDir(ctx context.Context, dirCtx *fs.DirCtx, offset int) (int, error) {
+func (f *fileOperations) IterateDir(ctx context.Context, d *fs.Dirent, dirCtx *fs.DirCtx, offset int) (int, error) {
if f.dirinfo == nil {
f.dirinfo = new(dirInfo)
f.dirinfo.buf = make([]byte, usermem.PageSize)
diff --git a/pkg/sentry/fs/host/fs.go b/pkg/sentry/fs/host/fs.go
index b1b8dc0b6..68d2697c0 100644
--- a/pkg/sentry/fs/host/fs.go
+++ b/pkg/sentry/fs/host/fs.go
@@ -23,9 +23,9 @@ import (
"strconv"
"strings"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
)
// FilesystemName is the name under which Filesystem is registered.
@@ -262,7 +262,7 @@ func childDentAttrs(ctx context.Context, d *fs.Dirent) map[string]fs.DentAttr {
// newMountSource constructs a new host fs.MountSource
// relative to a root path. The root should match the mount point.
func newMountSource(ctx context.Context, root string, mounter fs.FileOwner, filesystem fs.Filesystem, flags fs.MountSourceFlags, dontTranslateOwnership bool) *fs.MountSource {
- return fs.NewMountSource(&superOperations{
+ return fs.NewMountSource(ctx, &superOperations{
root: root,
inodeMappings: make(map[uint64]string),
mounter: mounter,
diff --git a/pkg/sentry/fs/host/fs_test.go b/pkg/sentry/fs/host/fs_test.go
index 16c89ddf1..c6852ee30 100644
--- a/pkg/sentry/fs/host/fs_test.go
+++ b/pkg/sentry/fs/host/fs_test.go
@@ -23,9 +23,9 @@ import (
"sort"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
)
// newTestMountNamespace creates a MountNamespace with a ramfs root.
diff --git a/pkg/sentry/fs/host/inode.go b/pkg/sentry/fs/host/inode.go
index 7a230e426..679d8321a 100644
--- a/pkg/sentry/fs/host/inode.go
+++ b/pkg/sentry/fs/host/inode.go
@@ -18,18 +18,18 @@ import (
"sync"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/fd"
- "gvisor.googlesource.com/gvisor/pkg/secio"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/device"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/fd"
+ "gvisor.dev/gvisor/pkg/secio"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/device"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/safemem"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// inodeOperations implements fs.InodeOperations for an fs.Inodes backed
@@ -205,7 +205,7 @@ func newInode(ctx context.Context, msrc *fs.MountSource, fd int, saveable bool,
}
// Return the fs.Inode.
- return fs.NewInode(iops, msrc, fileState.sattr), nil
+ return fs.NewInode(ctx, iops, msrc, fileState.sattr), nil
}
// Mappable implements fs.InodeOperations.Mappable.
@@ -245,7 +245,7 @@ func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string
}
// Return the fs.Dirent.
- return fs.NewDirent(inode, name), nil
+ return fs.NewDirent(ctx, inode, name), nil
}
// Create implements fs.InodeOperations.Create.
@@ -265,7 +265,7 @@ func (i *inodeOperations) Create(ctx context.Context, dir *fs.Inode, name string
return nil, err
}
- d := fs.NewDirent(inode, name)
+ d := fs.NewDirent(ctx, inode, name)
defer d.DecRef()
return inode.GetFile(ctx, d, flags)
}
diff --git a/pkg/sentry/fs/host/inode_state.go b/pkg/sentry/fs/host/inode_state.go
index 26cc755bc..b267ec305 100644
--- a/pkg/sentry/fs/host/inode_state.go
+++ b/pkg/sentry/fs/host/inode_state.go
@@ -18,9 +18,9 @@ import (
"fmt"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/device"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/device"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
)
// beforeSave is invoked by stateify.
diff --git a/pkg/sentry/fs/host/inode_test.go b/pkg/sentry/fs/host/inode_test.go
index ad1878b5a..2d959f10d 100644
--- a/pkg/sentry/fs/host/inode_test.go
+++ b/pkg/sentry/fs/host/inode_test.go
@@ -21,8 +21,8 @@ import (
"syscall"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
)
// TestMultipleReaddir verifies that multiple Readdir calls return the same
@@ -56,7 +56,7 @@ func TestMultipleReaddir(t *testing.T) {
t.Fatalf("Failed to create inode: %v", err)
}
- dirent := fs.NewDirent(n, "readdir")
+ dirent := fs.NewDirent(ctx, n, "readdir")
openFile, err := n.GetFile(ctx, dirent, fs.FileFlags{Read: true})
if err != nil {
t.Fatalf("Failed to get file: %v", err)
@@ -64,12 +64,12 @@ func TestMultipleReaddir(t *testing.T) {
defer openFile.DecRef()
c1 := &fs.DirCtx{DirCursor: new(string)}
- if _, err := openFile.FileOperations.(*fileOperations).IterateDir(ctx, c1, 0); err != nil {
+ if _, err := openFile.FileOperations.(*fileOperations).IterateDir(ctx, dirent, c1, 0); err != nil {
t.Fatalf("First Readdir failed: %v", err)
}
c2 := &fs.DirCtx{DirCursor: new(string)}
- if _, err := openFile.FileOperations.(*fileOperations).IterateDir(ctx, c2, 0); err != nil {
+ if _, err := openFile.FileOperations.(*fileOperations).IterateDir(ctx, dirent, c2, 0); err != nil {
t.Errorf("Second Readdir failed: %v", err)
}
diff --git a/pkg/sentry/fs/host/ioctl_unsafe.go b/pkg/sentry/fs/host/ioctl_unsafe.go
index b5a85c4d9..271582e54 100644
--- a/pkg/sentry/fs/host/ioctl_unsafe.go
+++ b/pkg/sentry/fs/host/ioctl_unsafe.go
@@ -18,7 +18,7 @@ import (
"syscall"
"unsafe"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/abi/linux"
)
func ioctlGetTermios(fd int) (*linux.Termios, error) {
diff --git a/pkg/sentry/fs/host/socket.go b/pkg/sentry/fs/host/socket.go
index 305eea718..44c4ee5f2 100644
--- a/pkg/sentry/fs/host/socket.go
+++ b/pkg/sentry/fs/host/socket.go
@@ -19,22 +19,22 @@ import (
"sync"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/fd"
- "gvisor.googlesource.com/gvisor/pkg/fdnotifier"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/refs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/control"
- unixsocket "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport"
- "gvisor.googlesource.com/gvisor/pkg/sentry/uniqueid"
- "gvisor.googlesource.com/gvisor/pkg/syserr"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/unet"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/fd"
+ "gvisor.dev/gvisor/pkg/fdnotifier"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/socket/control"
+ unixsocket "gvisor.dev/gvisor/pkg/sentry/socket/unix"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
+ "gvisor.dev/gvisor/pkg/sentry/uniqueid"
+ "gvisor.dev/gvisor/pkg/syserr"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/unet"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// maxSendBufferSize is the maximum host send buffer size allowed for endpoint.
@@ -47,12 +47,12 @@ const maxSendBufferSize = 8 << 20
//
// +stateify savable
type ConnectedEndpoint struct {
- queue *waiter.Queue
- path string
-
// ref keeps track of references to a connectedEndpoint.
ref refs.AtomicRefCount
+ queue *waiter.Queue
+ path string
+
// If srfd >= 0, it is the host FD that file was imported from.
srfd int `state:"wait"`
@@ -118,7 +118,7 @@ func (c *ConnectedEndpoint) init() *syserr.Error {
// The caller is responsible for calling Init(). Additionaly, Release needs to
// be called twice because ConnectedEndpoint is both a transport.Receiver and
// transport.ConnectedEndpoint.
-func NewConnectedEndpoint(file *fd.FD, queue *waiter.Queue, path string) (*ConnectedEndpoint, *syserr.Error) {
+func NewConnectedEndpoint(ctx context.Context, file *fd.FD, queue *waiter.Queue, path string) (*ConnectedEndpoint, *syserr.Error) {
e := ConnectedEndpoint{
path: path,
queue: queue,
@@ -133,6 +133,8 @@ func NewConnectedEndpoint(file *fd.FD, queue *waiter.Queue, path string) (*Conne
// AtomicRefCounters start off with a single reference. We need two.
e.ref.IncRef()
+ e.ref.EnableLeakCheck("host.ConnectedEndpoint")
+
return &e, nil
}
@@ -151,7 +153,7 @@ func (c *ConnectedEndpoint) Init() {
func NewSocketWithDirent(ctx context.Context, d *fs.Dirent, f *fd.FD, flags fs.FileFlags) (*fs.File, error) {
f2 := fd.New(f.FD())
var q waiter.Queue
- e, err := NewConnectedEndpoint(f2, &q, "" /* path */)
+ e, err := NewConnectedEndpoint(ctx, f2, &q, "" /* path */)
if err != nil {
f2.Release()
return nil, err.ToError()
@@ -162,7 +164,7 @@ func NewSocketWithDirent(ctx context.Context, d *fs.Dirent, f *fd.FD, flags fs.F
e.Init()
- ep := transport.NewExternal(e.stype, uniqueid.GlobalProviderFromContext(ctx), &q, e, e)
+ ep := transport.NewExternal(ctx, e.stype, uniqueid.GlobalProviderFromContext(ctx), &q, e, e)
return unixsocket.NewWithDirent(ctx, d, ep, e.stype, flags), nil
}
@@ -181,7 +183,7 @@ func newSocket(ctx context.Context, orgfd int, saveable bool) (*fs.File, error)
}
f := fd.New(ownedfd)
var q waiter.Queue
- e, err := NewConnectedEndpoint(f, &q, "" /* path */)
+ e, err := NewConnectedEndpoint(ctx, f, &q, "" /* path */)
if err != nil {
if saveable {
f.Close()
@@ -194,7 +196,7 @@ func newSocket(ctx context.Context, orgfd int, saveable bool) (*fs.File, error)
e.srfd = srfd
e.Init()
- ep := transport.NewExternal(e.stype, uniqueid.GlobalProviderFromContext(ctx), &q, e, e)
+ ep := transport.NewExternal(ctx, e.stype, uniqueid.GlobalProviderFromContext(ctx), &q, e, e)
return unixsocket.New(ctx, ep, e.stype), nil
}
diff --git a/pkg/sentry/fs/host/socket_iovec.go b/pkg/sentry/fs/host/socket_iovec.go
index 5efbb3ae8..05d7c79ad 100644
--- a/pkg/sentry/fs/host/socket_iovec.go
+++ b/pkg/sentry/fs/host/socket_iovec.go
@@ -17,8 +17,8 @@ package host
import (
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// maxIovs is the maximum number of iovecs to pass to the host.
diff --git a/pkg/sentry/fs/host/socket_state.go b/pkg/sentry/fs/host/socket_state.go
index 5676c451a..498018f0a 100644
--- a/pkg/sentry/fs/host/socket_state.go
+++ b/pkg/sentry/fs/host/socket_state.go
@@ -18,7 +18,7 @@ import (
"fmt"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/fd"
+ "gvisor.dev/gvisor/pkg/fd"
)
// beforeSave is invoked by stateify.
diff --git a/pkg/sentry/fs/host/socket_test.go b/pkg/sentry/fs/host/socket_test.go
index bc3ce5627..68b38fd1c 100644
--- a/pkg/sentry/fs/host/socket_test.go
+++ b/pkg/sentry/fs/host/socket_test.go
@@ -19,16 +19,16 @@ import (
"syscall"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/fd"
- "gvisor.googlesource.com/gvisor/pkg/fdnotifier"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserr"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/fd"
+ "gvisor.dev/gvisor/pkg/fdnotifier"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/socket"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserr"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/waiter"
)
var (
diff --git a/pkg/sentry/fs/host/tty.go b/pkg/sentry/fs/host/tty.go
index e45b339f5..2526412a4 100644
--- a/pkg/sentry/fs/host/tty.go
+++ b/pkg/sentry/fs/host/tty.go
@@ -17,14 +17,14 @@ package host
import (
"sync"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/unimpl"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/unimpl"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// TTYFileOperations implements fs.FileOperations for a host file descriptor
@@ -114,7 +114,7 @@ func (t *TTYFileOperations) Release() {
}
// Ioctl implements fs.FileOperations.Ioctl.
-func (t *TTYFileOperations) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+func (t *TTYFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
// Ignore arg[0]. This is the real FD:
fd := t.fileOperations.iops.fileState.FD()
ioctl := args[1].Uint64()
diff --git a/pkg/sentry/fs/host/util.go b/pkg/sentry/fs/host/util.go
index 94ff7708e..bad61a9a1 100644
--- a/pkg/sentry/fs/host/util.go
+++ b/pkg/sentry/fs/host/util.go
@@ -19,13 +19,13 @@ import (
"path"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/device"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/device"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/syserror"
)
func open(parent *inodeOperations, name string) (int, error) {
diff --git a/pkg/sentry/fs/host/util_unsafe.go b/pkg/sentry/fs/host/util_unsafe.go
index b95a57c3f..2b76f1065 100644
--- a/pkg/sentry/fs/host/util_unsafe.go
+++ b/pkg/sentry/fs/host/util_unsafe.go
@@ -18,9 +18,9 @@ import (
"syscall"
"unsafe"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
)
// NulByte is a single NUL byte. It is passed to readlinkat as an empty string.
diff --git a/pkg/sentry/fs/host/wait_test.go b/pkg/sentry/fs/host/wait_test.go
index afcb74724..88d24d693 100644
--- a/pkg/sentry/fs/host/wait_test.go
+++ b/pkg/sentry/fs/host/wait_test.go
@@ -19,9 +19,9 @@ import (
"testing"
"time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/waiter"
)
func TestWait(t *testing.T) {
diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go
index 0b54c2e77..f4ddfa406 100644
--- a/pkg/sentry/fs/inode.go
+++ b/pkg/sentry/fs/inode.go
@@ -15,16 +15,18 @@
package fs
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/metric"
- "gvisor.googlesource.com/gvisor/pkg/refs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/lock"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "sync"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/metric"
+ "gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs/lock"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
+ "gvisor.dev/gvisor/pkg/syserror"
)
var opens = metric.MustCreateNewUint64Metric("/fs/opens", false /* sync */, "Number of file opens.")
@@ -55,6 +57,12 @@ type Inode struct {
// overlay is the overlay entry for this Inode.
overlay *overlayEntry
+
+ // appendMu is used to synchronize write operations into files which
+ // have been opened with O_APPEND. Operations which change a file size
+ // have to take this lock for read. Write operations to files with
+ // O_APPEND have to take this lock for write.
+ appendMu sync.RWMutex `state:"nosave"`
}
// LockCtx is an Inode's lock context and contains different personalities of locks; both
@@ -76,14 +84,16 @@ type LockCtx struct {
// NewInode constructs an Inode from InodeOperations, a MountSource, and stable attributes.
//
// NewInode takes a reference on msrc.
-func NewInode(iops InodeOperations, msrc *MountSource, sattr StableAttr) *Inode {
+func NewInode(ctx context.Context, iops InodeOperations, msrc *MountSource, sattr StableAttr) *Inode {
msrc.IncRef()
- return &Inode{
+ i := Inode{
InodeOperations: iops,
StableAttr: sattr,
Watches: newWatches(),
MountSource: msrc,
}
+ i.EnableLeakCheck("fs.Inode")
+ return &i
}
// DecRef drops a reference on the Inode.
@@ -337,6 +347,8 @@ func (i *Inode) Truncate(ctx context.Context, d *Dirent, size int64) error {
if i.overlay != nil {
return overlayTruncate(ctx, i.overlay, d, size)
}
+ i.appendMu.RLock()
+ defer i.appendMu.RUnlock()
return i.InodeOperations.Truncate(ctx, i, size)
}
@@ -438,3 +450,12 @@ func (i *Inode) CheckCapability(ctx context.Context, cp linux.Capability) bool {
}
return creds.HasCapability(cp)
}
+
+func (i *Inode) lockAppendMu(appendMode bool) func() {
+ if appendMode {
+ i.appendMu.Lock()
+ return i.appendMu.Unlock
+ }
+ i.appendMu.RLock()
+ return i.appendMu.RUnlock
+}
diff --git a/pkg/sentry/fs/inode_operations.go b/pkg/sentry/fs/inode_operations.go
index ea089dfae..5cde9d215 100644
--- a/pkg/sentry/fs/inode_operations.go
+++ b/pkg/sentry/fs/inode_operations.go
@@ -17,10 +17,10 @@ package fs
import (
"errors"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
)
var (
diff --git a/pkg/sentry/fs/inode_overlay.go b/pkg/sentry/fs/inode_overlay.go
index 06506fb20..24b769cfc 100644
--- a/pkg/sentry/fs/inode_overlay.go
+++ b/pkg/sentry/fs/inode_overlay.go
@@ -17,11 +17,11 @@ package fs
import (
"strings"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
+ "gvisor.dev/gvisor/pkg/syserror"
)
func overlayHasWhiteout(parent *Inode, name string) bool {
@@ -111,7 +111,7 @@ func overlayLookup(ctx context.Context, parent *overlayEntry, inode *Inode, name
parent.copyMu.RUnlock()
return nil, false, err
}
- d, err := NewDirent(newOverlayInode(ctx, entry, inode.MountSource), name), nil
+ d, err := NewDirent(ctx, newOverlayInode(ctx, entry, inode.MountSource), name), nil
parent.copyMu.RUnlock()
return d, true, err
}
@@ -201,7 +201,7 @@ func overlayLookup(ctx context.Context, parent *overlayEntry, inode *Inode, name
parent.copyMu.RUnlock()
return nil, false, err
}
- d, err := NewDirent(newOverlayInode(ctx, entry, inode.MountSource), name), nil
+ d, err := NewDirent(ctx, newOverlayInode(ctx, entry, inode.MountSource), name), nil
parent.copyMu.RUnlock()
return d, upperInode != nil, err
}
@@ -217,6 +217,9 @@ func overlayCreate(ctx context.Context, o *overlayEntry, parent *Dirent, name st
return nil, err
}
+ // We've added to the directory so we must drop the cache.
+ o.markDirectoryDirty()
+
// Take another reference on the upper file's inode, which will be
// owned by the overlay entry.
upperFile.Dirent.Inode.IncRef()
@@ -245,7 +248,7 @@ func overlayCreate(ctx context.Context, o *overlayEntry, parent *Dirent, name st
// overlay file.
overlayInode := newOverlayInode(ctx, entry, parent.Inode.MountSource)
// d will own the inode reference.
- overlayDirent := NewDirent(overlayInode, name)
+ overlayDirent := NewDirent(ctx, overlayInode, name)
// The overlay file created below with NewFile will take a reference on
// the overlayDirent, and it should be the only thing holding a
// reference at the time of creation, so we must drop this reference.
@@ -265,7 +268,12 @@ func overlayCreateDirectory(ctx context.Context, o *overlayEntry, parent *Dirent
if err := copyUpLockedForRename(ctx, parent); err != nil {
return err
}
- return o.upper.InodeOperations.CreateDirectory(ctx, o.upper, name, perm)
+ if err := o.upper.InodeOperations.CreateDirectory(ctx, o.upper, name, perm); err != nil {
+ return err
+ }
+ // We've added to the directory so we must drop the cache.
+ o.markDirectoryDirty()
+ return nil
}
func overlayCreateLink(ctx context.Context, o *overlayEntry, parent *Dirent, oldname string, newname string) error {
@@ -273,7 +281,12 @@ func overlayCreateLink(ctx context.Context, o *overlayEntry, parent *Dirent, old
if err := copyUpLockedForRename(ctx, parent); err != nil {
return err
}
- return o.upper.InodeOperations.CreateLink(ctx, o.upper, oldname, newname)
+ if err := o.upper.InodeOperations.CreateLink(ctx, o.upper, oldname, newname); err != nil {
+ return err
+ }
+ // We've added to the directory so we must drop the cache.
+ o.markDirectoryDirty()
+ return nil
}
func overlayCreateHardLink(ctx context.Context, o *overlayEntry, parent *Dirent, target *Dirent, name string) error {
@@ -285,7 +298,12 @@ func overlayCreateHardLink(ctx context.Context, o *overlayEntry, parent *Dirent,
if err := copyUpLockedForRename(ctx, target); err != nil {
return err
}
- return o.upper.InodeOperations.CreateHardLink(ctx, o.upper, target.Inode.overlay.upper, name)
+ if err := o.upper.InodeOperations.CreateHardLink(ctx, o.upper, target.Inode.overlay.upper, name); err != nil {
+ return err
+ }
+ // We've added to the directory so we must drop the cache.
+ o.markDirectoryDirty()
+ return nil
}
func overlayCreateFifo(ctx context.Context, o *overlayEntry, parent *Dirent, name string, perm FilePermissions) error {
@@ -293,7 +311,12 @@ func overlayCreateFifo(ctx context.Context, o *overlayEntry, parent *Dirent, nam
if err := copyUpLockedForRename(ctx, parent); err != nil {
return err
}
- return o.upper.InodeOperations.CreateFifo(ctx, o.upper, name, perm)
+ if err := o.upper.InodeOperations.CreateFifo(ctx, o.upper, name, perm); err != nil {
+ return err
+ }
+ // We've added to the directory so we must drop the cache.
+ o.markDirectoryDirty()
+ return nil
}
func overlayRemove(ctx context.Context, o *overlayEntry, parent *Dirent, child *Dirent) error {
@@ -318,6 +341,8 @@ func overlayRemove(ctx context.Context, o *overlayEntry, parent *Dirent, child *
if child.Inode.overlay.lowerExists {
return overlayCreateWhiteout(o.upper, child.name)
}
+ // We've removed from the directory so we must drop the cache.
+ o.markDirectoryDirty()
return nil
}
@@ -395,6 +420,8 @@ func overlayRename(ctx context.Context, o *overlayEntry, oldParent *Dirent, rena
if renamed.Inode.overlay.lowerExists {
return overlayCreateWhiteout(oldParent.Inode.overlay.upper, oldName)
}
+ // We've changed the directory so we must drop the cache.
+ o.markDirectoryDirty()
return nil
}
@@ -411,6 +438,9 @@ func overlayBind(ctx context.Context, o *overlayEntry, parent *Dirent, name stri
return nil, err
}
+ // We've added to the directory so we must drop the cache.
+ o.markDirectoryDirty()
+
// Grab the inode and drop the dirent, we don't need it.
inode := d.Inode
inode.IncRef()
@@ -422,7 +452,7 @@ func overlayBind(ctx context.Context, o *overlayEntry, parent *Dirent, name stri
inode.DecRef()
return nil, err
}
- return NewDirent(newOverlayInode(ctx, entry, inode.MountSource), name), nil
+ return NewDirent(ctx, newOverlayInode(ctx, entry, inode.MountSource), name), nil
}
func overlayBoundEndpoint(o *overlayEntry, path string) transport.BoundEndpoint {
@@ -648,13 +678,13 @@ func NewTestOverlayDir(ctx context.Context, upper, lower *Inode, revalidate bool
fs := &overlayFilesystem{}
var upperMsrc *MountSource
if revalidate {
- upperMsrc = NewRevalidatingMountSource(fs, MountSourceFlags{})
+ upperMsrc = NewRevalidatingMountSource(ctx, fs, MountSourceFlags{})
} else {
- upperMsrc = NewNonCachingMountSource(fs, MountSourceFlags{})
+ upperMsrc = NewNonCachingMountSource(ctx, fs, MountSourceFlags{})
}
- msrc := NewMountSource(&overlayMountSourceOperations{
+ msrc := NewMountSource(ctx, &overlayMountSourceOperations{
upper: upperMsrc,
- lower: NewNonCachingMountSource(fs, MountSourceFlags{}),
+ lower: NewNonCachingMountSource(ctx, fs, MountSourceFlags{}),
}, fs, MountSourceFlags{})
overlay := &overlayEntry{
upper: upper,
diff --git a/pkg/sentry/fs/inode_overlay_test.go b/pkg/sentry/fs/inode_overlay_test.go
index 52ce1d29e..8935aad65 100644
--- a/pkg/sentry/fs/inode_overlay_test.go
+++ b/pkg/sentry/fs/inode_overlay_test.go
@@ -17,12 +17,12 @@ package fs_test
import (
"testing"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/contexttest"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/fs/ramfs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest"
+ "gvisor.dev/gvisor/pkg/syserror"
)
func TestLookup(t *testing.T) {
@@ -275,12 +275,12 @@ func TestLookupRevalidation(t *testing.T) {
},
} {
t.Run(tc.desc, func(t *testing.T) {
- root := fs.NewDirent(newTestRamfsDir(ctx, nil, nil), "root")
+ root := fs.NewDirent(ctx, newTestRamfsDir(ctx, nil, nil), "root")
ctx = &rootContext{
Context: ctx,
root: root,
}
- overlay := fs.NewDirent(fs.NewTestOverlayDir(ctx, tc.upper, tc.lower, tc.revalidate), "overlay")
+ overlay := fs.NewDirent(ctx, fs.NewTestOverlayDir(ctx, tc.upper, tc.lower, tc.revalidate), "overlay")
// Lookup the file twice through the overlay.
first, err := overlay.Walk(ctx, root, fileName)
if err != nil {
@@ -442,7 +442,7 @@ func (f *dirFile) Readdir(ctx context.Context, file *fs.File, ser fs.DentrySeria
}
func newTestRamfsInode(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
- inode := fs.NewInode(&inode{
+ inode := fs.NewInode(ctx, &inode{
InodeStaticFileGetter: fsutil.InodeStaticFileGetter{
Contents: []byte("foobar"),
},
@@ -451,7 +451,7 @@ func newTestRamfsInode(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
}
func newTestRamfsDir(ctx context.Context, contains []dirContent, negative []string) *fs.Inode {
- msrc := fs.NewPseudoMountSource()
+ msrc := fs.NewPseudoMountSource(ctx)
contents := make(map[string]*fs.Inode)
for _, c := range contains {
if c.dir {
@@ -463,7 +463,7 @@ func newTestRamfsDir(ctx context.Context, contains []dirContent, negative []stri
dops := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermissions{
User: fs.PermMask{Read: true, Execute: true},
})
- return fs.NewInode(&dir{
+ return fs.NewInode(ctx, &dir{
InodeOperations: dops,
negative: negative,
}, msrc, fs.StableAttr{Type: fs.Directory})
diff --git a/pkg/sentry/fs/inotify.go b/pkg/sentry/fs/inotify.go
index 7dfd31020..c7f4e2d13 100644
--- a/pkg/sentry/fs/inotify.go
+++ b/pkg/sentry/fs/inotify.go
@@ -18,14 +18,14 @@ import (
"sync"
"sync/atomic"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/uniqueid"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/uniqueid"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// Inotify represents an inotify instance created by inotify_init(2) or
@@ -202,7 +202,7 @@ func (i *Inotify) UnstableAttr(ctx context.Context, file *File) (UnstableAttr, e
}
// Ioctl implements fs.FileOperations.Ioctl.
-func (i *Inotify) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+func (i *Inotify) Ioctl(ctx context.Context, _ *File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
switch args[1].Int() {
case linux.FIONREAD:
i.evMu.Lock()
diff --git a/pkg/sentry/fs/inotify_event.go b/pkg/sentry/fs/inotify_event.go
index d52f956e4..9f70a3e82 100644
--- a/pkg/sentry/fs/inotify_event.go
+++ b/pkg/sentry/fs/inotify_event.go
@@ -18,8 +18,8 @@ import (
"bytes"
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// inotifyEventBaseSize is the base size of linux's struct inotify_event. This
diff --git a/pkg/sentry/fs/inotify_watch.go b/pkg/sentry/fs/inotify_watch.go
index a0b488467..0aa0a5e9b 100644
--- a/pkg/sentry/fs/inotify_watch.go
+++ b/pkg/sentry/fs/inotify_watch.go
@@ -18,7 +18,7 @@ import (
"sync"
"sync/atomic"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/abi/linux"
)
// Watch represent a particular inotify watch created by inotify_add_watch.
diff --git a/pkg/sentry/fs/lock/BUILD b/pkg/sentry/fs/lock/BUILD
index 7164744b8..08d7c0c57 100644
--- a/pkg/sentry/fs/lock/BUILD
+++ b/pkg/sentry/fs/lock/BUILD
@@ -39,7 +39,7 @@ go_library(
"lock_set.go",
"lock_set_functions.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/lock",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/fs/lock",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/log",
diff --git a/pkg/sentry/fs/lock/lock.go b/pkg/sentry/fs/lock/lock.go
index f2aee4512..636484424 100644
--- a/pkg/sentry/fs/lock/lock.go
+++ b/pkg/sentry/fs/lock/lock.go
@@ -55,7 +55,7 @@ import (
"sync"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// LockType is a type of regional file lock.
@@ -134,7 +134,7 @@ const (
// LockRegion attempts to acquire a typed lock for the uid on a region
// of a file. Returns true if successful in locking the region. If false
// is returned, the caller should normally interpret this as "try again later" if
-// accquiring the lock in a non-blocking mode or "interrupted" if in a blocking mode.
+// acquiring the lock in a non-blocking mode or "interrupted" if in a blocking mode.
// Blocker is the interface used to provide blocking behavior, passing a nil Blocker
// will result in non-blocking behavior.
func (l *Locks) LockRegion(uid UniqueID, t LockType, r LockRange, block Blocker) bool {
diff --git a/pkg/sentry/fs/mock.go b/pkg/sentry/fs/mock.go
index ff04e9b22..7a24c6f1b 100644
--- a/pkg/sentry/fs/mock.go
+++ b/pkg/sentry/fs/mock.go
@@ -15,8 +15,8 @@
package fs
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// MockInodeOperations implements InodeOperations for testing Inodes.
@@ -34,7 +34,7 @@ type MockInodeOperations struct {
// NewMockInode returns a mock *Inode using MockInodeOperations.
func NewMockInode(ctx context.Context, msrc *MountSource, sattr StableAttr) *Inode {
- return NewInode(NewMockInodeOperations(ctx), msrc, sattr)
+ return NewInode(ctx, NewMockInodeOperations(ctx), msrc, sattr)
}
// NewMockInodeOperations returns a *MockInodeOperations.
@@ -75,6 +75,12 @@ func (n *MockMountSourceOps) Keep(dirent *Dirent) bool {
return n.keep
}
+// CacheReaddir implements fs.MountSourceOperations.CacheReaddir.
+func (n *MockMountSourceOps) CacheReaddir() bool {
+ // Common case: cache readdir results if there is a dirent cache.
+ return n.keep
+}
+
// WriteOut implements fs.InodeOperations.WriteOut.
func (n *MockInodeOperations) WriteOut(context.Context, *Inode) error {
return nil
@@ -93,7 +99,7 @@ func (n *MockInodeOperations) IsVirtual() bool {
// Lookup implements fs.InodeOperations.Lookup.
func (n *MockInodeOperations) Lookup(ctx context.Context, dir *Inode, p string) (*Dirent, error) {
n.walkCalled = true
- return NewDirent(NewInode(&MockInodeOperations{}, dir.MountSource, StableAttr{}), p), nil
+ return NewDirent(ctx, NewInode(ctx, &MockInodeOperations{}, dir.MountSource, StableAttr{}), p), nil
}
// SetPermissions implements fs.InodeOperations.SetPermissions.
@@ -114,7 +120,7 @@ func (n *MockInodeOperations) SetTimestamps(context.Context, *Inode, TimeSpec) e
// Create implements fs.InodeOperations.Create.
func (n *MockInodeOperations) Create(ctx context.Context, dir *Inode, p string, flags FileFlags, perms FilePermissions) (*File, error) {
n.createCalled = true
- d := NewDirent(NewInode(&MockInodeOperations{}, dir.MountSource, StableAttr{}), p)
+ d := NewDirent(ctx, NewInode(ctx, &MockInodeOperations{}, dir.MountSource, StableAttr{}), p)
return &File{Dirent: d}, nil
}
diff --git a/pkg/sentry/fs/mount.go b/pkg/sentry/fs/mount.go
index 41e0d285b..7a9692800 100644
--- a/pkg/sentry/fs/mount.go
+++ b/pkg/sentry/fs/mount.go
@@ -19,12 +19,12 @@ import (
"fmt"
"sync/atomic"
- "gvisor.googlesource.com/gvisor/pkg/refs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/sentry/context"
)
-// DirentOperations provide file systems greater control over how long a Dirent stays pinned
-// in core. Implementations must not take Dirent.mu.
+// DirentOperations provide file systems greater control over how long a Dirent
+// stays pinned in core. Implementations must not take Dirent.mu.
type DirentOperations interface {
// Revalidate is called during lookup each time we encounter a Dirent
// in the cache. Implementations may update stale properties of the
@@ -37,6 +37,12 @@ type DirentOperations interface {
// Keep returns true if the Dirent should be kept in memory for as long
// as possible beyond any active references.
Keep(dirent *Dirent) bool
+
+ // CacheReaddir returns true if directory entries returned by
+ // FileOperations.Readdir may be cached for future use.
+ //
+ // Postconditions: This method must always return the same value.
+ CacheReaddir() bool
}
// MountSourceOperations contains filesystem specific operations.
@@ -127,17 +133,19 @@ const DefaultDirentCacheSize uint64 = 1000
// NewMountSource returns a new MountSource. Filesystem may be nil if there is no
// filesystem backing the mount.
-func NewMountSource(mops MountSourceOperations, filesystem Filesystem, flags MountSourceFlags) *MountSource {
+func NewMountSource(ctx context.Context, mops MountSourceOperations, filesystem Filesystem, flags MountSourceFlags) *MountSource {
fsType := "none"
if filesystem != nil {
fsType = filesystem.Name()
}
- return &MountSource{
+ msrc := MountSource{
MountSourceOperations: mops,
Flags: flags,
FilesystemType: fsType,
fscache: NewDirentCache(DefaultDirentCacheSize),
}
+ msrc.EnableLeakCheck("fs.MountSource")
+ return &msrc
}
// DirentRefs returns the current mount direntRefs.
@@ -188,36 +196,40 @@ func (msrc *MountSource) SetDirentCacheLimiter(l *DirentCacheLimiter) {
// NewCachingMountSource returns a generic mount that will cache dirents
// aggressively.
-func NewCachingMountSource(filesystem Filesystem, flags MountSourceFlags) *MountSource {
- return NewMountSource(&SimpleMountSourceOperations{
- keep: true,
- revalidate: false,
+func NewCachingMountSource(ctx context.Context, filesystem Filesystem, flags MountSourceFlags) *MountSource {
+ return NewMountSource(ctx, &SimpleMountSourceOperations{
+ keep: true,
+ revalidate: false,
+ cacheReaddir: true,
}, filesystem, flags)
}
// NewNonCachingMountSource returns a generic mount that will never cache dirents.
-func NewNonCachingMountSource(filesystem Filesystem, flags MountSourceFlags) *MountSource {
- return NewMountSource(&SimpleMountSourceOperations{
- keep: false,
- revalidate: false,
+func NewNonCachingMountSource(ctx context.Context, filesystem Filesystem, flags MountSourceFlags) *MountSource {
+ return NewMountSource(ctx, &SimpleMountSourceOperations{
+ keep: false,
+ revalidate: false,
+ cacheReaddir: false,
}, filesystem, flags)
}
// NewRevalidatingMountSource returns a generic mount that will cache dirents,
-// but will revalidate them on each lookup.
-func NewRevalidatingMountSource(filesystem Filesystem, flags MountSourceFlags) *MountSource {
- return NewMountSource(&SimpleMountSourceOperations{
- keep: true,
- revalidate: true,
+// but will revalidate them on each lookup and always perform uncached readdir.
+func NewRevalidatingMountSource(ctx context.Context, filesystem Filesystem, flags MountSourceFlags) *MountSource {
+ return NewMountSource(ctx, &SimpleMountSourceOperations{
+ keep: true,
+ revalidate: true,
+ cacheReaddir: false,
}, filesystem, flags)
}
// NewPseudoMountSource returns a "pseudo" mount source that is not backed by
// an actual filesystem. It is always non-caching.
-func NewPseudoMountSource() *MountSource {
- return NewMountSource(&SimpleMountSourceOperations{
- keep: false,
- revalidate: false,
+func NewPseudoMountSource(ctx context.Context) *MountSource {
+ return NewMountSource(ctx, &SimpleMountSourceOperations{
+ keep: false,
+ revalidate: false,
+ cacheReaddir: false,
}, nil, MountSourceFlags{})
}
@@ -225,8 +237,9 @@ func NewPseudoMountSource() *MountSource {
//
// +stateify savable
type SimpleMountSourceOperations struct {
- keep bool
- revalidate bool
+ keep bool
+ revalidate bool
+ cacheReaddir bool
}
// Revalidate implements MountSourceOperations.Revalidate.
@@ -239,6 +252,11 @@ func (smo *SimpleMountSourceOperations) Keep(*Dirent) bool {
return smo.keep
}
+// CacheReaddir implements MountSourceOperations.CacheReaddir.
+func (smo *SimpleMountSourceOperations) CacheReaddir() bool {
+ return smo.cacheReaddir
+}
+
// ResetInodeMappings implements MountSourceOperations.ResetInodeMappings.
func (*SimpleMountSourceOperations) ResetInodeMappings() {}
diff --git a/pkg/sentry/fs/mount_overlay.go b/pkg/sentry/fs/mount_overlay.go
index 535f812c8..4fcdd6c01 100644
--- a/pkg/sentry/fs/mount_overlay.go
+++ b/pkg/sentry/fs/mount_overlay.go
@@ -15,7 +15,7 @@
package fs
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/context"
)
// overlayMountSourceOperations implements MountSourceOperations for an overlay
@@ -28,10 +28,10 @@ type overlayMountSourceOperations struct {
lower *MountSource
}
-func newOverlayMountSource(upper, lower *MountSource, flags MountSourceFlags) *MountSource {
+func newOverlayMountSource(ctx context.Context, upper, lower *MountSource, flags MountSourceFlags) *MountSource {
upper.IncRef()
lower.IncRef()
- msrc := NewMountSource(&overlayMountSourceOperations{
+ msrc := NewMountSource(ctx, &overlayMountSourceOperations{
upper: upper,
lower: lower,
}, &overlayFilesystem{}, flags)
@@ -81,6 +81,17 @@ func (o *overlayMountSourceOperations) Keep(dirent *Dirent) bool {
return o.upper.Keep(dirent)
}
+// CacheReaddir implements MountSourceOperations.CacheReaddir for an overlay by
+// performing the logical AND of the upper and lower filesystems' CacheReaddir
+// methods.
+//
+// N.B. This is fs-global instead of inode-specific because it must always
+// return the same value. If it was inode-specific, we couldn't guarantee that
+// property across copy up.
+func (o *overlayMountSourceOperations) CacheReaddir() bool {
+ return o.lower.CacheReaddir() && o.upper.CacheReaddir()
+}
+
// ResetInodeMappings propagates the call to both upper and lower MountSource.
func (o *overlayMountSourceOperations) ResetInodeMappings() {
o.upper.ResetInodeMappings()
diff --git a/pkg/sentry/fs/mount_test.go b/pkg/sentry/fs/mount_test.go
index 2e2716643..0b84732aa 100644
--- a/pkg/sentry/fs/mount_test.go
+++ b/pkg/sentry/fs/mount_test.go
@@ -18,7 +18,7 @@ import (
"fmt"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
)
// cacheReallyContains iterates through the dirent cache to determine whether
diff --git a/pkg/sentry/fs/mounts.go b/pkg/sentry/fs/mounts.go
index a5c52d7ba..693ffc760 100644
--- a/pkg/sentry/fs/mounts.go
+++ b/pkg/sentry/fs/mounts.go
@@ -22,12 +22,12 @@ import (
"sync"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/refs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// DefaultTraversalLimit provides a sensible default traversal limit that may
@@ -124,7 +124,16 @@ func (m *Mount) IsUndo() bool {
return false
}
-// MountNamespace defines a collection of mounts.
+// MountNamespace defines a VFS root. It contains collection of Mounts that are
+// mounted inside the Dirent tree rooted at the Root Dirent. It provides
+// methods for traversing the Dirent, and for mounting/unmounting in the tree.
+//
+// Note that this does not correspond to a "mount namespace" in the Linux. It
+// is more like a unique VFS instance.
+//
+// It's possible for different processes to have different MountNamespaces. In
+// this case, the file systems exposed to the processes are completely
+// distinct.
//
// +stateify savable
type MountNamespace struct {
@@ -166,18 +175,20 @@ func NewMountNamespace(ctx context.Context, root *Inode) (*MountNamespace, error
// Set the root dirent and id on the root mount. The reference returned from
// NewDirent will be donated to the MountNamespace constructed below.
- d := NewDirent(root, "/")
+ d := NewDirent(ctx, root, "/")
mnts := map[*Dirent]*Mount{
d: newRootMount(1, d),
}
- return &MountNamespace{
+ mns := MountNamespace{
userns: creds.UserNamespace,
root: d,
mounts: mnts,
mountID: 2,
- }, nil
+ }
+ mns.EnableLeakCheck("fs.MountNamespace")
+ return &mns, nil
}
// UserNamespace returns the user namespace associated with this mount manager.
@@ -652,6 +663,11 @@ func (mns *MountNamespace) ResolveExecutablePath(ctx context.Context, wd, name s
}
defer d.DecRef()
+ // Check that it is a regular file.
+ if !IsRegular(d.Inode.StableAttr) {
+ continue
+ }
+
// Check whether we can read and execute the found file.
if err := d.Inode.CheckPermission(ctx, PermMask{Read: true, Execute: true}); err != nil {
log.Infof("Found executable at %q, but user cannot execute it: %v", binPath, err)
diff --git a/pkg/sentry/fs/mounts_test.go b/pkg/sentry/fs/mounts_test.go
index 56d726dd1..c4c771f2c 100644
--- a/pkg/sentry/fs/mounts_test.go
+++ b/pkg/sentry/fs/mounts_test.go
@@ -17,11 +17,11 @@ package fs_test
import (
"testing"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/fs/ramfs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest"
)
// Creates a new MountNamespace with filesystem:
@@ -30,17 +30,17 @@ import (
// |-bar (file)
func createMountNamespace(ctx context.Context) (*fs.MountNamespace, error) {
perms := fs.FilePermsFromMode(0777)
- m := fs.NewPseudoMountSource()
+ m := fs.NewPseudoMountSource(ctx)
barFile := fsutil.NewSimpleFileInode(ctx, fs.RootOwner, perms, 0)
fooDir := ramfs.NewDir(ctx, map[string]*fs.Inode{
- "bar": fs.NewInode(barFile, m, fs.StableAttr{Type: fs.RegularFile}),
+ "bar": fs.NewInode(ctx, barFile, m, fs.StableAttr{Type: fs.RegularFile}),
}, fs.RootOwner, perms)
rootDir := ramfs.NewDir(ctx, map[string]*fs.Inode{
- "foo": fs.NewInode(fooDir, m, fs.StableAttr{Type: fs.Directory}),
+ "foo": fs.NewInode(ctx, fooDir, m, fs.StableAttr{Type: fs.Directory}),
}, fs.RootOwner, perms)
- return fs.NewMountNamespace(ctx, fs.NewInode(rootDir, m, fs.StableAttr{Type: fs.Directory}))
+ return fs.NewMountNamespace(ctx, fs.NewInode(ctx, rootDir, m, fs.StableAttr{Type: fs.Directory}))
}
func TestFindLink(t *testing.T) {
diff --git a/pkg/sentry/fs/offset.go b/pkg/sentry/fs/offset.go
index 3f68da149..f7d844ce7 100644
--- a/pkg/sentry/fs/offset.go
+++ b/pkg/sentry/fs/offset.go
@@ -17,7 +17,7 @@ package fs
import (
"math"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// OffsetPageEnd returns the file offset rounded up to the nearest
diff --git a/pkg/sentry/fs/overlay.go b/pkg/sentry/fs/overlay.go
index db89a5f70..1d3ff39e0 100644
--- a/pkg/sentry/fs/overlay.go
+++ b/pkg/sentry/fs/overlay.go
@@ -19,11 +19,12 @@ import (
"strings"
"sync"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/third_party/gvsync"
)
// The virtual filesystem implements an overlay configuration. For a high-level
@@ -104,7 +105,7 @@ func NewOverlayRoot(ctx context.Context, upper *Inode, lower *Inode, flags Mount
return nil, fmt.Errorf("cannot nest overlay in upper file of another overlay")
}
- msrc := newOverlayMountSource(upper.MountSource, lower.MountSource, flags)
+ msrc := newOverlayMountSource(ctx, upper.MountSource, lower.MountSource, flags)
overlay, err := newOverlayEntry(ctx, upper, lower, true)
if err != nil {
msrc.DecRef()
@@ -127,7 +128,7 @@ func NewOverlayRootFile(ctx context.Context, upperMS *MountSource, lower *Inode,
if !IsRegular(lower.StableAttr) {
return nil, fmt.Errorf("lower Inode is not a regular file")
}
- msrc := newOverlayMountSource(upperMS, lower.MountSource, flags)
+ msrc := newOverlayMountSource(ctx, upperMS, lower.MountSource, flags)
overlay, err := newOverlayEntry(ctx, nil, lower, true)
if err != nil {
msrc.DecRef()
@@ -140,9 +141,9 @@ func NewOverlayRootFile(ctx context.Context, upperMS *MountSource, lower *Inode,
func newOverlayInode(ctx context.Context, o *overlayEntry, msrc *MountSource) *Inode {
var inode *Inode
if o.upper != nil {
- inode = NewInode(nil, msrc, o.upper.StableAttr)
+ inode = NewInode(ctx, nil, msrc, o.upper.StableAttr)
} else {
- inode = NewInode(nil, msrc, o.lower.StableAttr)
+ inode = NewInode(ctx, nil, msrc, o.lower.StableAttr)
}
inode.overlay = o
return inode
@@ -196,6 +197,12 @@ type overlayEntry struct {
// these locks is sufficient to read upper; holding all three for writing
// is required to mutate it.
upper *Inode
+
+ // dirCacheMu protects dirCache.
+ dirCacheMu gvsync.DowngradableRWMutex `state:"nosave"`
+
+ // dirCache is cache of DentAttrs from upper and lower Inodes.
+ dirCache *SortedDentryMap
}
// newOverlayEntry returns a new overlayEntry.
@@ -258,6 +265,17 @@ func (o *overlayEntry) isMappableLocked() bool {
return o.inodeLocked().Mappable() != nil
}
+// markDirectoryDirty marks any cached data dirty for this directory. This is
+// necessary in order to ensure that this node does not retain stale state
+// throughout its lifetime across multiple open directory handles.
+//
+// Currently this means invalidating any readdir caches.
+func (o *overlayEntry) markDirectoryDirty() {
+ o.dirCacheMu.Lock()
+ o.dirCache = nil
+ o.dirCacheMu.Unlock()
+}
+
// AddMapping implements memmap.Mappable.AddMapping.
func (o *overlayEntry) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) error {
o.mapsMu.Lock()
diff --git a/pkg/sentry/fs/proc/BUILD b/pkg/sentry/fs/proc/BUILD
index 1728fe0b5..70ed854a8 100644
--- a/pkg/sentry/fs/proc/BUILD
+++ b/pkg/sentry/fs/proc/BUILD
@@ -27,10 +27,11 @@ go_library(
"uptime.go",
"version.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/fs/proc",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
+ "//pkg/binary",
"//pkg/log",
"//pkg/sentry/context",
"//pkg/sentry/fs",
@@ -41,7 +42,6 @@ go_library(
"//pkg/sentry/inet",
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
- "//pkg/sentry/kernel/kdefs",
"//pkg/sentry/kernel/time",
"//pkg/sentry/limits",
"//pkg/sentry/mm",
diff --git a/pkg/sentry/fs/proc/cgroup.go b/pkg/sentry/fs/proc/cgroup.go
index 1019f862a..05e31c55d 100644
--- a/pkg/sentry/fs/proc/cgroup.go
+++ b/pkg/sentry/fs/proc/cgroup.go
@@ -17,8 +17,8 @@ package proc
import (
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
)
func newCGroupInode(ctx context.Context, msrc *fs.MountSource, cgroupControllers map[string]string) *fs.Inode {
diff --git a/pkg/sentry/fs/proc/cpuinfo.go b/pkg/sentry/fs/proc/cpuinfo.go
index 15031234e..3edf36780 100644
--- a/pkg/sentry/fs/proc/cpuinfo.go
+++ b/pkg/sentry/fs/proc/cpuinfo.go
@@ -15,9 +15,9 @@
package proc
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
)
func newCPUInfo(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
diff --git a/pkg/sentry/fs/proc/device/BUILD b/pkg/sentry/fs/proc/device/BUILD
index 64b0c5a3a..0394451d4 100644
--- a/pkg/sentry/fs/proc/device/BUILD
+++ b/pkg/sentry/fs/proc/device/BUILD
@@ -5,7 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "device",
srcs = ["device.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/device",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/fs/proc/device",
visibility = ["//pkg/sentry:internal"],
deps = ["//pkg/sentry/device"],
)
diff --git a/pkg/sentry/fs/proc/device/device.go b/pkg/sentry/fs/proc/device/device.go
index 0de466c73..bbe66e796 100644
--- a/pkg/sentry/fs/proc/device/device.go
+++ b/pkg/sentry/fs/proc/device/device.go
@@ -16,7 +16,7 @@
package device
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/device"
+ "gvisor.dev/gvisor/pkg/sentry/device"
)
// ProcDevice is the kernel proc device.
diff --git a/pkg/sentry/fs/proc/exec_args.go b/pkg/sentry/fs/proc/exec_args.go
index cb28f6bc3..1d3a2d426 100644
--- a/pkg/sentry/fs/proc/exec_args.go
+++ b/pkg/sentry/fs/proc/exec_args.go
@@ -19,14 +19,14 @@ import (
"fmt"
"io"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// execArgType enumerates the types of exec arguments that are exposed through
@@ -64,7 +64,7 @@ func newExecArgInode(t *kernel.Task, msrc *fs.MountSource, arg execArgType) *fs.
arg: arg,
t: t,
}
- return newProcInode(f, msrc, fs.SpecialFile, t)
+ return newProcInode(t, f, msrc, fs.SpecialFile, t)
}
// GetFile implements fs.InodeOperations.GetFile.
diff --git a/pkg/sentry/fs/proc/fds.go b/pkg/sentry/fs/proc/fds.go
index 744b31c74..bee421d76 100644
--- a/pkg/sentry/fs/proc/fds.go
+++ b/pkg/sentry/fs/proc/fds.go
@@ -19,14 +19,13 @@ import (
"sort"
"strconv"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/device"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/fs/proc/device"
+ "gvisor.dev/gvisor/pkg/sentry/fs/ramfs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// walkDescriptors finds the descriptor (file-flag pair) for the fd identified
@@ -42,8 +41,8 @@ func walkDescriptors(t *kernel.Task, p string, toInode func(*fs.File, kernel.FDF
var file *fs.File
var fdFlags kernel.FDFlags
t.WithMuLocked(func(t *kernel.Task) {
- if fdm := t.FDMap(); fdm != nil {
- file, fdFlags = fdm.GetDescriptor(kdefs.FD(n))
+ if fdTable := t.FDTable(); fdTable != nil {
+ file, fdFlags = fdTable.Get(int32(n))
}
})
if file == nil {
@@ -56,36 +55,31 @@ func walkDescriptors(t *kernel.Task, p string, toInode func(*fs.File, kernel.FDF
// toDentAttr callback for each to get a DentAttr, which it then emits. This is
// a helper for implementing fs.InodeOperations.Readdir.
func readDescriptors(t *kernel.Task, c *fs.DirCtx, offset int64, toDentAttr func(int) fs.DentAttr) (int64, error) {
- var fds kernel.FDs
+ var fds []int32
t.WithMuLocked(func(t *kernel.Task) {
- if fdm := t.FDMap(); fdm != nil {
- fds = fdm.GetFDs()
+ if fdTable := t.FDTable(); fdTable != nil {
+ fds = fdTable.GetFDs()
}
})
- fdInts := make([]int, 0, len(fds))
- for _, fd := range fds {
- fdInts = append(fdInts, int(fd))
- }
-
- // Find the fd to start at.
- idx := sort.SearchInts(fdInts, int(offset))
- if idx == len(fdInts) {
+ // Find the appropriate starting point.
+ idx := sort.Search(len(fds), func(i int) bool { return fds[i] >= int32(offset) })
+ if idx == len(fds) {
return offset, nil
}
- fdInts = fdInts[idx:]
+ fds = fds[idx:]
- var fd int
- for _, fd = range fdInts {
+ // Serialize all FDs.
+ for _, fd := range fds {
name := strconv.FormatUint(uint64(fd), 10)
- if err := c.DirEmit(name, toDentAttr(fd)); err != nil {
+ if err := c.DirEmit(name, toDentAttr(int(fd))); err != nil {
// Returned offset is the next fd to serialize.
return int64(fd), err
}
}
// We serialized them all. Next offset should be higher than last
// serialized fd.
- return int64(fd + 1), nil
+ return int64(fds[len(fds)-1] + 1), nil
}
// fd implements fs.InodeOperations for a file in /proc/TID/fd/.
@@ -105,7 +99,7 @@ func newFd(t *kernel.Task, f *fs.File, msrc *fs.MountSource) *fs.Inode {
Symlink: *ramfs.NewSymlink(t, fs.RootOwner, ""),
file: f,
}
- return newProcInode(fd, msrc, fs.Symlink, t)
+ return newProcInode(t, fd, msrc, fs.Symlink, t)
}
// GetFile returns the fs.File backing this fd. The dirent and flags
@@ -154,9 +148,9 @@ func (f *fd) Close() error {
type fdDir struct {
ramfs.Dir
- // We hold a reference on the task's fdmap but only keep an indirect
- // task pointer to avoid Dirent loading circularity caused by fdmap's
- // potential back pointers into the dirent tree.
+ // We hold a reference on the task's FDTable but only keep an indirect
+ // task pointer to avoid Dirent loading circularity caused by the
+ // table's back pointers into the dirent tree.
t *kernel.Task
}
@@ -168,7 +162,7 @@ func newFdDir(t *kernel.Task, msrc *fs.MountSource) *fs.Inode {
Dir: *ramfs.NewDir(t, nil, fs.RootOwner, fs.FilePermissions{User: fs.PermMask{Read: true, Execute: true}}),
t: t,
}
- return newProcInode(f, msrc, fs.SpecialDirectory, t)
+ return newProcInode(t, f, msrc, fs.SpecialDirectory, t)
}
// Check implements InodeOperations.Check.
@@ -198,7 +192,7 @@ func (f *fdDir) Lookup(ctx context.Context, dir *fs.Inode, p string) (*fs.Dirent
if err != nil {
return nil, err
}
- return fs.NewDirent(n, p), nil
+ return fs.NewDirent(ctx, n, p), nil
}
// GetFile implements fs.FileOperations.GetFile.
@@ -252,7 +246,7 @@ func newFdInfoDir(t *kernel.Task, msrc *fs.MountSource) *fs.Inode {
Dir: *ramfs.NewDir(t, nil, fs.RootOwner, fs.FilePermsFromMode(0500)),
t: t,
}
- return newProcInode(fdid, msrc, fs.SpecialDirectory, t)
+ return newProcInode(t, fdid, msrc, fs.SpecialDirectory, t)
}
// Lookup loads an fd in /proc/TID/fdinfo into a Dirent.
@@ -272,7 +266,7 @@ func (fdid *fdInfoDir) Lookup(ctx context.Context, dir *fs.Inode, p string) (*fs
if err != nil {
return nil, err
}
- return fs.NewDirent(inode, p), nil
+ return fs.NewDirent(ctx, inode, p), nil
}
// GetFile implements fs.FileOperations.GetFile.
diff --git a/pkg/sentry/fs/proc/filesystems.go b/pkg/sentry/fs/proc/filesystems.go
index 7bb081d0e..e9250c51c 100644
--- a/pkg/sentry/fs/proc/filesystems.go
+++ b/pkg/sentry/fs/proc/filesystems.go
@@ -18,9 +18,9 @@ import (
"bytes"
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile"
)
// filesystemsData backs /proc/filesystems.
diff --git a/pkg/sentry/fs/proc/fs.go b/pkg/sentry/fs/proc/fs.go
index d57d6cc5d..f14833805 100644
--- a/pkg/sentry/fs/proc/fs.go
+++ b/pkg/sentry/fs/proc/fs.go
@@ -17,8 +17,8 @@ package proc
import (
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
)
// filesystem is a procfs.
@@ -30,7 +30,7 @@ func init() {
fs.RegisterFilesystem(&filesystem{})
}
-// FilesystemName is the name underwhich the filesystem is registered.
+// FilesystemName is the name under which the filesystem is registered.
// Name matches fs/proc/root.c:proc_fs_type.name.
const FilesystemName = "proc"
@@ -77,5 +77,5 @@ func (f *filesystem) Mount(ctx context.Context, device string, flags fs.MountSou
// Construct the procfs root. Since procfs files are all virtual, we
// never want them cached.
- return New(ctx, fs.NewNonCachingMountSource(f, flags), cgroups)
+ return New(ctx, fs.NewNonCachingMountSource(ctx, f, flags), cgroups)
}
diff --git a/pkg/sentry/fs/proc/inode.go b/pkg/sentry/fs/proc/inode.go
index 986bc0a45..0c04f81fa 100644
--- a/pkg/sentry/fs/proc/inode.go
+++ b/pkg/sentry/fs/proc/inode.go
@@ -15,15 +15,15 @@
package proc
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/device"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/sentry/mm"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/fs/proc/device"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/mm"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// taskOwnedInodeOps wraps an fs.InodeOperations and overrides the UnstableAttr
@@ -115,11 +115,11 @@ func newStaticProcInode(ctx context.Context, msrc *fs.MountSource, contents []by
Contents: contents,
},
}
- return newProcInode(iops, msrc, fs.SpecialFile, nil)
+ return newProcInode(ctx, iops, msrc, fs.SpecialFile, nil)
}
// newProcInode creates a new inode from the given inode operations.
-func newProcInode(iops fs.InodeOperations, msrc *fs.MountSource, typ fs.InodeType, t *kernel.Task) *fs.Inode {
+func newProcInode(ctx context.Context, iops fs.InodeOperations, msrc *fs.MountSource, typ fs.InodeType, t *kernel.Task) *fs.Inode {
sattr := fs.StableAttr{
DeviceID: device.ProcDevice.DeviceID(),
InodeID: device.ProcDevice.NextIno(),
@@ -129,5 +129,5 @@ func newProcInode(iops fs.InodeOperations, msrc *fs.MountSource, typ fs.InodeTyp
if t != nil {
iops = &taskOwnedInodeOps{iops, t}
}
- return fs.NewInode(iops, msrc, sattr)
+ return fs.NewInode(ctx, iops, msrc, sattr)
}
diff --git a/pkg/sentry/fs/proc/loadavg.go b/pkg/sentry/fs/proc/loadavg.go
index 2dfe7089a..8602b7426 100644
--- a/pkg/sentry/fs/proc/loadavg.go
+++ b/pkg/sentry/fs/proc/loadavg.go
@@ -18,8 +18,8 @@ import (
"bytes"
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile"
)
// loadavgData backs /proc/loadavg.
diff --git a/pkg/sentry/fs/proc/meminfo.go b/pkg/sentry/fs/proc/meminfo.go
index d2b9b92c7..495f3e3ba 100644
--- a/pkg/sentry/fs/proc/meminfo.go
+++ b/pkg/sentry/fs/proc/meminfo.go
@@ -18,11 +18,11 @@ import (
"bytes"
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// meminfoData backs /proc/meminfo.
diff --git a/pkg/sentry/fs/proc/mounts.go b/pkg/sentry/fs/proc/mounts.go
index 1f7817947..e33c4a460 100644
--- a/pkg/sentry/fs/proc/mounts.go
+++ b/pkg/sentry/fs/proc/mounts.go
@@ -19,10 +19,10 @@ import (
"fmt"
"sort"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
)
// forEachMountSource runs f for the process root mount and each mount that is a
diff --git a/pkg/sentry/fs/proc/net.go b/pkg/sentry/fs/proc/net.go
index 034950158..37694620c 100644
--- a/pkg/sentry/fs/proc/net.go
+++ b/pkg/sentry/fs/proc/net.go
@@ -19,17 +19,18 @@ import (
"fmt"
"time"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/inet"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile"
+ "gvisor.dev/gvisor/pkg/sentry/fs/ramfs"
+ "gvisor.dev/gvisor/pkg/sentry/inet"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/socket"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
)
// newNet creates a new proc net entry.
@@ -55,9 +56,8 @@ func (p *proc) newNetDir(ctx context.Context, k *kernel.Kernel, msrc *fs.MountSo
"psched": newStaticProcInode(ctx, msrc, []byte(fmt.Sprintf("%08x %08x %08x %08x\n", uint64(time.Microsecond/time.Nanosecond), 64, 1000000, uint64(time.Second/time.Nanosecond)))),
"ptype": newStaticProcInode(ctx, msrc, []byte("Type Device Function")),
"route": newStaticProcInode(ctx, msrc, []byte("Iface Destination Gateway Flags RefCnt Use Metric Mask MTU Window IRTT")),
- "tcp": newStaticProcInode(ctx, msrc, []byte(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode")),
-
- "udp": newStaticProcInode(ctx, msrc, []byte(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode ref pointer drops")),
+ "tcp": seqfile.NewSeqFileInode(ctx, &netTCP{k: k}, msrc),
+ "udp": newStaticProcInode(ctx, msrc, []byte(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode ref pointer drops")),
"unix": seqfile.NewSeqFileInode(ctx, &netUnix{k: k}, msrc),
}
@@ -70,7 +70,7 @@ func (p *proc) newNetDir(ctx context.Context, k *kernel.Kernel, msrc *fs.MountSo
}
}
d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555))
- return newProcInode(d, msrc, fs.SpecialDirectory, nil)
+ return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil)
}
// ifinet6 implements seqfile.SeqSource for /proc/net/if_inet6.
@@ -210,10 +210,6 @@ func (n *netUnix) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]s
}
var buf bytes.Buffer
- // Header
- fmt.Fprintf(&buf, "Num RefCount Protocol Flags Type St Inode Path\n")
-
- // Entries
for _, se := range n.k.ListSockets() {
s := se.Sock.Get()
if s == nil {
@@ -222,6 +218,7 @@ func (n *netUnix) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]s
}
sfile := s.(*fs.File)
if family, _, _ := sfile.FileOperations.(socket.Socket).Type(); family != linux.AF_UNIX {
+ s.DecRef()
// Not a unix socket.
continue
}
@@ -281,12 +278,160 @@ func (n *netUnix) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]s
}
fmt.Fprintf(&buf, "\n")
- sfile.DecRef()
+ s.DecRef()
+ }
+
+ data := []seqfile.SeqData{
+ {
+ Buf: []byte("Num RefCount Protocol Flags Type St Inode Path\n"),
+ Handle: n,
+ },
+ {
+ Buf: buf.Bytes(),
+ Handle: n,
+ },
+ }
+ return data, 0
+}
+
+// netTCP implements seqfile.SeqSource for /proc/net/tcp.
+//
+// +stateify savable
+type netTCP struct {
+ k *kernel.Kernel
+}
+
+// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate.
+func (*netTCP) NeedsUpdate(generation int64) bool {
+ return true
+}
+
+// ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData.
+func (n *netTCP) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
+ t := kernel.TaskFromContext(ctx)
+
+ if h != nil {
+ return nil, 0
+ }
+
+ var buf bytes.Buffer
+ for _, se := range n.k.ListSockets() {
+ s := se.Sock.Get()
+ if s == nil {
+ log.Debugf("Couldn't resolve weakref %+v in socket table, racing with destruction?", se.Sock)
+ continue
+ }
+ sfile := s.(*fs.File)
+ sops, ok := sfile.FileOperations.(socket.Socket)
+ if !ok {
+ panic(fmt.Sprintf("Found non-socket file in socket table: %+v", sfile))
+ }
+ if family, stype, _ := sops.Type(); !(family == linux.AF_INET && stype == linux.SOCK_STREAM) {
+ s.DecRef()
+ // Not tcp4 sockets.
+ continue
+ }
+
+ // Linux's documentation for the fields below can be found at
+ // https://www.kernel.org/doc/Documentation/networking/proc_net_tcp.txt.
+ // For Linux's implementation, see net/ipv4/tcp_ipv4.c:get_tcp4_sock().
+ // Note that the header doesn't contain labels for all the fields.
+
+ // Field: sl; entry number.
+ fmt.Fprintf(&buf, "%4d: ", se.ID)
+
+ portBuf := make([]byte, 2)
+
+ // Field: local_adddress.
+ var localAddr linux.SockAddrInet
+ if local, _, err := sops.GetSockName(t); err == nil {
+ localAddr = local.(linux.SockAddrInet)
+ }
+ binary.LittleEndian.PutUint16(portBuf, localAddr.Port)
+ fmt.Fprintf(&buf, "%08X:%04X ",
+ binary.LittleEndian.Uint32(localAddr.Addr[:]),
+ portBuf)
+
+ // Field: rem_address.
+ var remoteAddr linux.SockAddrInet
+ if remote, _, err := sops.GetPeerName(t); err == nil {
+ remoteAddr = remote.(linux.SockAddrInet)
+ }
+ binary.LittleEndian.PutUint16(portBuf, remoteAddr.Port)
+ fmt.Fprintf(&buf, "%08X:%04X ",
+ binary.LittleEndian.Uint32(remoteAddr.Addr[:]),
+ portBuf)
+
+ // Field: state; socket state.
+ fmt.Fprintf(&buf, "%02X ", sops.State())
+
+ // Field: tx_queue, rx_queue; number of packets in the transmit and
+ // receive queue. Unimplemented.
+ fmt.Fprintf(&buf, "%08X:%08X ", 0, 0)
+
+ // Field: tr, tm->when; timer active state and number of jiffies
+ // until timer expires. Unimplemented.
+ fmt.Fprintf(&buf, "%02X:%08X ", 0, 0)
+
+ // Field: retrnsmt; number of unrecovered RTO timeouts.
+ // Unimplemented.
+ fmt.Fprintf(&buf, "%08X ", 0)
+
+ // Field: uid.
+ uattr, err := sfile.Dirent.Inode.UnstableAttr(ctx)
+ if err != nil {
+ log.Warningf("Failed to retrieve unstable attr for socket file: %v", err)
+ fmt.Fprintf(&buf, "%5d ", 0)
+ } else {
+ fmt.Fprintf(&buf, "%5d ", uint32(uattr.Owner.UID.In(t.UserNamespace()).OrOverflow()))
+ }
+
+ // Field: timeout; number of unanswered 0-window probes.
+ // Unimplemented.
+ fmt.Fprintf(&buf, "%8d ", 0)
+
+ // Field: inode.
+ fmt.Fprintf(&buf, "%8d ", sfile.InodeID())
+
+ // Field: refcount. Don't count the ref we obtain while deferencing
+ // the weakref to this socket.
+ fmt.Fprintf(&buf, "%d ", sfile.ReadRefs()-1)
+
+ // Field: Socket struct address. Redacted due to the same reason as
+ // the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData.
+ fmt.Fprintf(&buf, "%#016p ", (*socket.Socket)(nil))
+
+ // Field: retransmit timeout. Unimplemented.
+ fmt.Fprintf(&buf, "%d ", 0)
+
+ // Field: predicted tick of soft clock (delayed ACK control data).
+ // Unimplemented.
+ fmt.Fprintf(&buf, "%d ", 0)
+
+ // Field: (ack.quick<<1)|ack.pingpong, Unimplemented.
+ fmt.Fprintf(&buf, "%d ", 0)
+
+ // Field: sending congestion window, Unimplemented.
+ fmt.Fprintf(&buf, "%d ", 0)
+
+ // Field: Slow start size threshold, -1 if threshold >= 0xFFFF.
+ // Unimplemented, report as large threshold.
+ fmt.Fprintf(&buf, "%d", -1)
+
+ fmt.Fprintf(&buf, "\n")
+
+ s.DecRef()
}
- data := []seqfile.SeqData{{
- Buf: buf.Bytes(),
- Handle: (*netUnix)(nil),
- }}
+ data := []seqfile.SeqData{
+ {
+ Buf: []byte(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode \n"),
+ Handle: n,
+ },
+ {
+ Buf: buf.Bytes(),
+ Handle: n,
+ },
+ }
return data, 0
}
diff --git a/pkg/sentry/fs/proc/net_test.go b/pkg/sentry/fs/proc/net_test.go
index 9aed5fdca..f18681405 100644
--- a/pkg/sentry/fs/proc/net_test.go
+++ b/pkg/sentry/fs/proc/net_test.go
@@ -18,8 +18,8 @@ import (
"reflect"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/inet"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/inet"
)
func newIPv6TestStack() *inet.TestStack {
diff --git a/pkg/sentry/fs/proc/proc.go b/pkg/sentry/fs/proc/proc.go
index 0e15894b4..0ef13f2f5 100644
--- a/pkg/sentry/fs/proc/proc.go
+++ b/pkg/sentry/fs/proc/proc.go
@@ -20,15 +20,15 @@ import (
"sort"
"strconv"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/device"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/fs/proc/device"
+ "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile"
+ "gvisor.dev/gvisor/pkg/sentry/fs/ramfs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// proc is a root proc node.
@@ -68,7 +68,7 @@ func New(ctx context.Context, msrc *fs.MountSource, cgroupControllers map[string
"filesystems": seqfile.NewSeqFileInode(ctx, &filesystemsData{}, msrc),
"loadavg": seqfile.NewSeqFileInode(ctx, &loadavgData{}, msrc),
"meminfo": seqfile.NewSeqFileInode(ctx, &meminfoData{k}, msrc),
- "mounts": newProcInode(ramfs.NewSymlink(ctx, fs.RootOwner, "self/mounts"), msrc, fs.Symlink, nil),
+ "mounts": newProcInode(ctx, ramfs.NewSymlink(ctx, fs.RootOwner, "self/mounts"), msrc, fs.Symlink, nil),
"self": newSelf(ctx, pidns, msrc),
"stat": seqfile.NewSeqFileInode(ctx, &statData{k}, msrc),
"thread-self": newThreadSelf(ctx, pidns, msrc),
@@ -94,7 +94,7 @@ func New(ctx context.Context, msrc *fs.MountSource, cgroupControllers map[string
p.AddChild(ctx, "net", p.newNetDir(ctx, k, msrc))
}
- return newProcInode(p, msrc, fs.SpecialDirectory, nil), nil
+ return newProcInode(ctx, p, msrc, fs.SpecialDirectory, nil), nil
}
// self is a magical link.
@@ -112,7 +112,7 @@ func newSelf(ctx context.Context, pidns *kernel.PIDNamespace, msrc *fs.MountSour
Symlink: *ramfs.NewSymlink(ctx, fs.RootOwner, ""),
pidns: pidns,
}
- return newProcInode(s, msrc, fs.Symlink, nil)
+ return newProcInode(ctx, s, msrc, fs.Symlink, nil)
}
// newThreadSelf returns a new "threadSelf" node.
@@ -121,7 +121,7 @@ func newThreadSelf(ctx context.Context, pidns *kernel.PIDNamespace, msrc *fs.Mou
Symlink: *ramfs.NewSymlink(ctx, fs.RootOwner, ""),
pidns: pidns,
}
- return newProcInode(s, msrc, fs.Symlink, nil)
+ return newProcInode(ctx, s, msrc, fs.Symlink, nil)
}
// Readlink implements fs.InodeOperations.Readlink.
@@ -185,7 +185,7 @@ func (p *proc) Lookup(ctx context.Context, dir *fs.Inode, name string) (*fs.Dire
// Wrap it in a taskDir.
td := p.newTaskDir(otherTask, dir.MountSource, true)
- return fs.NewDirent(td, name), nil
+ return fs.NewDirent(ctx, td, name), nil
}
// GetFile implements fs.InodeOperations.
diff --git a/pkg/sentry/fs/proc/rpcinet_proc.go b/pkg/sentry/fs/proc/rpcinet_proc.go
index e36c0bfa6..01ac97530 100644
--- a/pkg/sentry/fs/proc/rpcinet_proc.go
+++ b/pkg/sentry/fs/proc/rpcinet_proc.go
@@ -17,19 +17,19 @@ package proc
import (
"io"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/fs/ramfs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
-// rpcInetInode implments fs.InodeOperations.
+// rpcInetInode implements fs.InodeOperations.
type rpcInetInode struct {
fsutil.SimpleFileInode
@@ -45,7 +45,7 @@ func newRPCInetInode(ctx context.Context, msrc *fs.MountSource, filepath string,
filepath: filepath,
k: kernel.KernelFromContext(ctx),
}
- return newProcInode(f, msrc, fs.SpecialFile, nil)
+ return newProcInode(ctx, f, msrc, fs.SpecialFile, nil)
}
// GetFile implements fs.InodeOperations.GetFile.
@@ -141,7 +141,7 @@ func newRPCInetProcNet(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
}
d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555))
- return newProcInode(d, msrc, fs.SpecialDirectory, nil)
+ return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil)
}
// newRPCInetProcSysNet will build an inode for /proc/sys/net.
@@ -152,7 +152,7 @@ func newRPCInetProcSysNet(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
}
d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555))
- return newProcInode(d, msrc, fs.SpecialDirectory, nil)
+ return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil)
}
// newRPCInetSysNetCore builds the /proc/sys/net/core directory.
@@ -170,7 +170,7 @@ func newRPCInetSysNetCore(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
}
d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555))
- return newProcInode(d, msrc, fs.SpecialDirectory, nil)
+ return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil)
}
// newRPCInetSysNetIPv4Dir builds the /proc/sys/net/ipv4 directory.
@@ -213,5 +213,5 @@ func newRPCInetSysNetIPv4Dir(ctx context.Context, msrc *fs.MountSource) *fs.Inod
}
d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555))
- return newProcInode(d, msrc, fs.SpecialDirectory, nil)
+ return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil)
}
diff --git a/pkg/sentry/fs/proc/seqfile/BUILD b/pkg/sentry/fs/proc/seqfile/BUILD
index 6b44c0075..20c3eefc8 100644
--- a/pkg/sentry/fs/proc/seqfile/BUILD
+++ b/pkg/sentry/fs/proc/seqfile/BUILD
@@ -5,7 +5,7 @@ load("//tools/go_stateify:defs.bzl", "go_library", "go_test")
go_library(
name = "seqfile",
srcs = ["seqfile.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
diff --git a/pkg/sentry/fs/proc/seqfile/seqfile.go b/pkg/sentry/fs/proc/seqfile/seqfile.go
index 8364d86ed..5fe823000 100644
--- a/pkg/sentry/fs/proc/seqfile/seqfile.go
+++ b/pkg/sentry/fs/proc/seqfile/seqfile.go
@@ -12,21 +12,22 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+// Package seqfile provides dynamic ordered files.
package seqfile
import (
"io"
"sync"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/device"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/fs/proc/device"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// SeqHandle is a helper handle to seek in the file.
@@ -133,7 +134,7 @@ func NewSeqFileInode(ctx context.Context, source SeqSource, msrc *fs.MountSource
BlockSize: usermem.PageSize,
Type: fs.SpecialFile,
}
- return fs.NewInode(iops, msrc, sattr)
+ return fs.NewInode(ctx, iops, msrc, sattr)
}
// UnstableAttr returns unstable attributes of the SeqFile.
diff --git a/pkg/sentry/fs/proc/seqfile/seqfile_test.go b/pkg/sentry/fs/proc/seqfile/seqfile_test.go
index c4de565eb..ebfeee835 100644
--- a/pkg/sentry/fs/proc/seqfile/seqfile_test.go
+++ b/pkg/sentry/fs/proc/seqfile/seqfile_test.go
@@ -20,11 +20,11 @@ import (
"io"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/ramfs"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
type seqTest struct {
@@ -120,15 +120,15 @@ func TestSeqFile(t *testing.T) {
testSource.Init()
// Create a file that can be R/W.
- m := fs.NewPseudoMountSource()
ctx := contexttest.Context(t)
+ m := fs.NewPseudoMountSource(ctx)
contents := map[string]*fs.Inode{
"foo": NewSeqFileInode(ctx, testSource, m),
}
root := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0777))
// How about opening it?
- inode := fs.NewInode(root, m, fs.StableAttr{Type: fs.Directory})
+ inode := fs.NewInode(ctx, root, m, fs.StableAttr{Type: fs.Directory})
dirent2, err := root.Lookup(ctx, inode, "foo")
if err != nil {
t.Fatalf("failed to walk to foo for n2: %v", err)
@@ -196,15 +196,15 @@ func TestSeqFileFileUpdated(t *testing.T) {
testSource.update = true
// Create a file that can be R/W.
- m := fs.NewPseudoMountSource()
ctx := contexttest.Context(t)
+ m := fs.NewPseudoMountSource(ctx)
contents := map[string]*fs.Inode{
"foo": NewSeqFileInode(ctx, testSource, m),
}
root := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0777))
// How about opening it?
- inode := fs.NewInode(root, m, fs.StableAttr{Type: fs.Directory})
+ inode := fs.NewInode(ctx, root, m, fs.StableAttr{Type: fs.Directory})
dirent2, err := root.Lookup(ctx, inode, "foo")
if err != nil {
t.Fatalf("failed to walk to foo for dirent2: %v", err)
diff --git a/pkg/sentry/fs/proc/stat.go b/pkg/sentry/fs/proc/stat.go
index 397f9ec6b..b641effbb 100644
--- a/pkg/sentry/fs/proc/stat.go
+++ b/pkg/sentry/fs/proc/stat.go
@@ -18,10 +18,10 @@ import (
"bytes"
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
)
// statData backs /proc/stat.
diff --git a/pkg/sentry/fs/proc/sys.go b/pkg/sentry/fs/proc/sys.go
index 59846af4f..cd37776c8 100644
--- a/pkg/sentry/fs/proc/sys.go
+++ b/pkg/sentry/fs/proc/sys.go
@@ -19,16 +19,16 @@ import (
"io"
"strconv"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile"
+ "gvisor.dev/gvisor/pkg/sentry/fs/ramfs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// mmapMinAddrData backs /proc/sys/vm/mmap_min_addr.
@@ -82,14 +82,14 @@ func (p *proc) newKernelDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode
}
children := map[string]*fs.Inode{
- "hostname": newProcInode(&h, msrc, fs.SpecialFile, nil),
+ "hostname": newProcInode(ctx, &h, msrc, fs.SpecialFile, nil),
"shmall": newStaticProcInode(ctx, msrc, []byte(strconv.FormatUint(linux.SHMALL, 10))),
"shmmax": newStaticProcInode(ctx, msrc, []byte(strconv.FormatUint(linux.SHMMAX, 10))),
"shmmni": newStaticProcInode(ctx, msrc, []byte(strconv.FormatUint(linux.SHMMNI, 10))),
}
d := ramfs.NewDir(ctx, children, fs.RootOwner, fs.FilePermsFromMode(0555))
- return newProcInode(d, msrc, fs.SpecialDirectory, nil)
+ return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil)
}
func (p *proc) newVMDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
@@ -98,7 +98,7 @@ func (p *proc) newVMDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
"overcommit_memory": seqfile.NewSeqFileInode(ctx, &overcommitMemory{}, msrc),
}
d := ramfs.NewDir(ctx, children, fs.RootOwner, fs.FilePermsFromMode(0555))
- return newProcInode(d, msrc, fs.SpecialDirectory, nil)
+ return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil)
}
func (p *proc) newSysDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
@@ -115,7 +115,7 @@ func (p *proc) newSysDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
}
d := ramfs.NewDir(ctx, children, fs.RootOwner, fs.FilePermsFromMode(0555))
- return newProcInode(d, msrc, fs.SpecialDirectory, nil)
+ return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil)
}
// hostname is the inode for a file containing the system hostname.
diff --git a/pkg/sentry/fs/proc/sys_net.go b/pkg/sentry/fs/proc/sys_net.go
index dbf1a987c..f3b63dfc2 100644
--- a/pkg/sentry/fs/proc/sys_net.go
+++ b/pkg/sentry/fs/proc/sys_net.go
@@ -19,15 +19,15 @@ import (
"io"
"sync"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/device"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/inet"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/fs/proc/device"
+ "gvisor.dev/gvisor/pkg/sentry/fs/ramfs"
+ "gvisor.dev/gvisor/pkg/sentry/inet"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/waiter"
)
type tcpMemDir int
@@ -74,7 +74,7 @@ func newTCPMemInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack, dir
BlockSize: usermem.PageSize,
Type: fs.SpecialFile,
}
- return fs.NewInode(tm, msrc, sattr)
+ return fs.NewInode(ctx, tm, msrc, sattr)
}
// GetFile implements fs.InodeOperations.GetFile.
@@ -184,7 +184,7 @@ func newTCPSackInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *f
BlockSize: usermem.PageSize,
Type: fs.SpecialFile,
}
- return fs.NewInode(ts, msrc, sattr)
+ return fs.NewInode(ctx, ts, msrc, sattr)
}
// GetFile implements fs.InodeOperations.GetFile.
@@ -277,7 +277,7 @@ func (p *proc) newSysNetCore(ctx context.Context, msrc *fs.MountSource, s inet.S
}
d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555))
- return newProcInode(d, msrc, fs.SpecialDirectory, nil)
+ return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil)
}
func (p *proc) newSysNetIPv4Dir(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *fs.Inode {
@@ -339,7 +339,7 @@ func (p *proc) newSysNetIPv4Dir(ctx context.Context, msrc *fs.MountSource, s ine
}
d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555))
- return newProcInode(d, msrc, fs.SpecialDirectory, nil)
+ return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil)
}
func (p *proc) newSysNetDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
@@ -351,5 +351,5 @@ func (p *proc) newSysNetDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode
}
}
d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555))
- return newProcInode(d, msrc, fs.SpecialDirectory, nil)
+ return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil)
}
diff --git a/pkg/sentry/fs/proc/sys_net_test.go b/pkg/sentry/fs/proc/sys_net_test.go
index 78135ba13..6abae7a60 100644
--- a/pkg/sentry/fs/proc/sys_net_test.go
+++ b/pkg/sentry/fs/proc/sys_net_test.go
@@ -17,9 +17,9 @@ package proc
import (
"testing"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/inet"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/inet"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
func TestQuerySendBufferSize(t *testing.T) {
diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go
index 21a965f90..d82d4e998 100644
--- a/pkg/sentry/fs/proc/task.go
+++ b/pkg/sentry/fs/proc/task.go
@@ -21,20 +21,20 @@ import (
"sort"
"strconv"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/device"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/limits"
- "gvisor.googlesource.com/gvisor/pkg/sentry/mm"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/fs/proc/device"
+ "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile"
+ "gvisor.dev/gvisor/pkg/sentry/fs/ramfs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
+ "gvisor.dev/gvisor/pkg/sentry/mm"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// getTaskMM returns t's MemoryManager. If getTaskMM succeeds, the MemoryManager's
@@ -101,7 +101,7 @@ func (p *proc) newTaskDir(t *kernel.Task, msrc *fs.MountSource, showSubtasks boo
Dir: *ramfs.NewDir(t, contents, fs.RootOwner, fs.FilePermsFromMode(0555)),
t: t,
}
- return newProcInode(d, msrc, fs.SpecialDirectory, t)
+ return newProcInode(t, d, msrc, fs.SpecialDirectory, t)
}
// subtasks represents a /proc/TID/task directory.
@@ -122,7 +122,7 @@ func (p *proc) newSubtasks(t *kernel.Task, msrc *fs.MountSource) *fs.Inode {
t: t,
p: p,
}
- return newProcInode(s, msrc, fs.SpecialDirectory, t)
+ return newProcInode(t, s, msrc, fs.SpecialDirectory, t)
}
// UnstableAttr returns unstable attributes of the subtasks.
@@ -162,6 +162,11 @@ func (f *subtasksFile) Readdir(ctx context.Context, file *fs.File, ser fs.Dentry
// subtask to emit.
offset := file.Offset()
+ tasks := f.t.ThreadGroup().MemberIDs(f.pidns)
+ if len(tasks) == 0 {
+ return offset, syserror.ENOENT
+ }
+
if offset == 0 {
// Serialize "." and "..".
root := fs.RootFromContext(ctx)
@@ -178,7 +183,6 @@ func (f *subtasksFile) Readdir(ctx context.Context, file *fs.File, ser fs.Dentry
}
// Serialize tasks.
- tasks := f.t.ThreadGroup().MemberIDs(f.pidns)
taskInts := make([]int, 0, len(tasks))
for _, tid := range tasks {
taskInts = append(taskInts, int(tid))
@@ -223,7 +227,7 @@ func (s *subtasks) Lookup(ctx context.Context, dir *fs.Inode, p string) (*fs.Dir
}
td := s.p.newTaskDir(task, dir.MountSource, false)
- return fs.NewDirent(td, p), nil
+ return fs.NewDirent(ctx, td, p), nil
}
// exe is an fs.InodeOperations symlink for the /proc/PID/exe file.
@@ -240,7 +244,7 @@ func newExe(t *kernel.Task, msrc *fs.MountSource) *fs.Inode {
Symlink: *ramfs.NewSymlink(t, fs.RootOwner, ""),
t: t,
}
- return newProcInode(exeSymlink, msrc, fs.Symlink, t)
+ return newProcInode(t, exeSymlink, msrc, fs.Symlink, t)
}
func (e *exe) executable() (d *fs.Dirent, err error) {
@@ -308,7 +312,7 @@ func newNamespaceSymlink(t *kernel.Task, msrc *fs.MountSource, name string) *fs.
Symlink: *ramfs.NewSymlink(t, fs.RootOwner, target),
t: t,
}
- return newProcInode(n, msrc, fs.Symlink, t)
+ return newProcInode(t, n, msrc, fs.Symlink, t)
}
// Getlink implements fs.InodeOperations.Getlink.
@@ -319,7 +323,7 @@ func (n *namespaceSymlink) Getlink(ctx context.Context, inode *fs.Inode) (*fs.Di
// Create a new regular file to fake the namespace file.
iops := fsutil.NewNoReadWriteFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0777), linux.PROC_SUPER_MAGIC)
- return fs.NewDirent(newProcInode(iops, inode.MountSource, fs.RegularFile, nil), n.Symlink.Target), nil
+ return fs.NewDirent(ctx, newProcInode(ctx, iops, inode.MountSource, fs.RegularFile, nil), n.Symlink.Target), nil
}
func newNamespaceDir(t *kernel.Task, msrc *fs.MountSource) *fs.Inode {
@@ -329,7 +333,7 @@ func newNamespaceDir(t *kernel.Task, msrc *fs.MountSource) *fs.Inode {
"user": newNamespaceSymlink(t, msrc, "user"),
}
d := ramfs.NewDir(t, contents, fs.RootOwner, fs.FilePermsFromMode(0511))
- return newProcInode(d, msrc, fs.SpecialDirectory, t)
+ return newProcInode(t, d, msrc, fs.SpecialDirectory, t)
}
// mapsData implements seqfile.SeqSource for /proc/[pid]/maps.
@@ -340,7 +344,7 @@ type mapsData struct {
}
func newMaps(t *kernel.Task, msrc *fs.MountSource) *fs.Inode {
- return newProcInode(seqfile.NewSeqFile(t, &mapsData{t}), msrc, fs.SpecialFile, t)
+ return newProcInode(t, seqfile.NewSeqFile(t, &mapsData{t}), msrc, fs.SpecialFile, t)
}
func (md *mapsData) mm() *mm.MemoryManager {
@@ -380,7 +384,7 @@ type smapsData struct {
}
func newSmaps(t *kernel.Task, msrc *fs.MountSource) *fs.Inode {
- return newProcInode(seqfile.NewSeqFile(t, &smapsData{t}), msrc, fs.SpecialFile, t)
+ return newProcInode(t, seqfile.NewSeqFile(t, &smapsData{t}), msrc, fs.SpecialFile, t)
}
func (sd *smapsData) mm() *mm.MemoryManager {
@@ -426,7 +430,7 @@ type taskStatData struct {
}
func newTaskStat(t *kernel.Task, msrc *fs.MountSource, showSubtasks bool, pidns *kernel.PIDNamespace) *fs.Inode {
- return newProcInode(seqfile.NewSeqFile(t, &taskStatData{t, showSubtasks /* tgstats */, pidns}), msrc, fs.SpecialFile, t)
+ return newProcInode(t, seqfile.NewSeqFile(t, &taskStatData{t, showSubtasks /* tgstats */, pidns}), msrc, fs.SpecialFile, t)
}
// NeedsUpdate returns whether the generation is old or not.
@@ -511,7 +515,7 @@ type statmData struct {
}
func newStatm(t *kernel.Task, msrc *fs.MountSource) *fs.Inode {
- return newProcInode(seqfile.NewSeqFile(t, &statmData{t}), msrc, fs.SpecialFile, t)
+ return newProcInode(t, seqfile.NewSeqFile(t, &statmData{t}), msrc, fs.SpecialFile, t)
}
// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate.
@@ -548,7 +552,7 @@ type statusData struct {
}
func newStatus(t *kernel.Task, msrc *fs.MountSource, pidns *kernel.PIDNamespace) *fs.Inode {
- return newProcInode(seqfile.NewSeqFile(t, &statusData{t, pidns}), msrc, fs.SpecialFile, t)
+ return newProcInode(t, seqfile.NewSeqFile(t, &statusData{t, pidns}), msrc, fs.SpecialFile, t)
}
// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate.
@@ -580,8 +584,8 @@ func (s *statusData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) (
var fds int
var vss, rss, data uint64
s.t.WithMuLocked(func(t *kernel.Task) {
- if fdm := t.FDMap(); fdm != nil {
- fds = fdm.Size()
+ if fdTable := t.FDTable(); fdTable != nil {
+ fds = fdTable.Size()
}
if mm := t.MemoryManager(); mm != nil {
vss = mm.VirtualMemorySize()
@@ -615,7 +619,7 @@ type ioData struct {
}
func newIO(t *kernel.Task, msrc *fs.MountSource) *fs.Inode {
- return newProcInode(seqfile.NewSeqFile(t, &ioData{t.ThreadGroup()}), msrc, fs.SpecialFile, t)
+ return newProcInode(t, seqfile.NewSeqFile(t, &ioData{t.ThreadGroup()}), msrc, fs.SpecialFile, t)
}
// NeedsUpdate returns whether the generation is old or not.
@@ -664,7 +668,7 @@ func newComm(t *kernel.Task, msrc *fs.MountSource) *fs.Inode {
SimpleFileInode: *fsutil.NewSimpleFileInode(t, fs.RootOwner, fs.FilePermsFromMode(0444), linux.PROC_SUPER_MAGIC),
t: t,
}
- return newProcInode(c, msrc, fs.SpecialFile, t)
+ return newProcInode(t, c, msrc, fs.SpecialFile, t)
}
// Check implements fs.InodeOperations.Check.
@@ -736,7 +740,7 @@ func newAuxvec(t *kernel.Task, msrc *fs.MountSource) *fs.Inode {
SimpleFileInode: *fsutil.NewSimpleFileInode(t, fs.RootOwner, fs.FilePermsFromMode(0444), linux.PROC_SUPER_MAGIC),
t: t,
}
- return newProcInode(a, msrc, fs.SpecialFile, t)
+ return newProcInode(t, a, msrc, fs.SpecialFile, t)
}
// GetFile implements fs.InodeOperations.GetFile.
diff --git a/pkg/sentry/fs/proc/uid_gid_map.go b/pkg/sentry/fs/proc/uid_gid_map.go
index a14b1b45f..eea37d15c 100644
--- a/pkg/sentry/fs/proc/uid_gid_map.go
+++ b/pkg/sentry/fs/proc/uid_gid_map.go
@@ -19,15 +19,15 @@ import (
"fmt"
"io"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// idMapInodeOperations implements fs.InodeOperations for
@@ -66,7 +66,7 @@ func newGIDMap(t *kernel.Task, msrc *fs.MountSource) *fs.Inode {
}
func newIDMap(t *kernel.Task, msrc *fs.MountSource, gids bool) *fs.Inode {
- return newProcInode(&idMapInodeOperations{
+ return newProcInode(t, &idMapInodeOperations{
InodeSimpleAttributes: fsutil.NewInodeSimpleAttributes(t, fs.RootOwner, fs.FilePermsFromMode(0644), linux.PROC_SUPER_MAGIC),
t: t,
gids: gids,
diff --git a/pkg/sentry/fs/proc/uptime.go b/pkg/sentry/fs/proc/uptime.go
index 35c3851e1..4e903917a 100644
--- a/pkg/sentry/fs/proc/uptime.go
+++ b/pkg/sentry/fs/proc/uptime.go
@@ -18,14 +18,14 @@ import (
"fmt"
"io"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// uptime is a file containing the system uptime.
@@ -44,7 +44,7 @@ func newUptime(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0444), linux.PROC_SUPER_MAGIC),
startTime: ktime.NowFromContext(ctx),
}
- return newProcInode(u, msrc, fs.SpecialFile, nil)
+ return newProcInode(ctx, u, msrc, fs.SpecialFile, nil)
}
// GetFile implements fs.InodeOperations.GetFile.
diff --git a/pkg/sentry/fs/proc/version.go b/pkg/sentry/fs/proc/version.go
index a5479990c..a6d2c3cd3 100644
--- a/pkg/sentry/fs/proc/version.go
+++ b/pkg/sentry/fs/proc/version.go
@@ -17,9 +17,9 @@ package proc
import (
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
)
// versionData backs /proc/version.
diff --git a/pkg/sentry/fs/ramfs/BUILD b/pkg/sentry/fs/ramfs/BUILD
index f36e4a5e8..516efcc4c 100644
--- a/pkg/sentry/fs/ramfs/BUILD
+++ b/pkg/sentry/fs/ramfs/BUILD
@@ -10,7 +10,7 @@ go_library(
"symlink.go",
"tree.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/fs/ramfs",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
diff --git a/pkg/sentry/fs/ramfs/dir.go b/pkg/sentry/fs/ramfs/dir.go
index cd6e03d66..f3e984c24 100644
--- a/pkg/sentry/fs/ramfs/dir.go
+++ b/pkg/sentry/fs/ramfs/dir.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+// Package ramfs provides the fundamentals for a simple in-memory filesystem.
package ramfs
import (
@@ -19,12 +20,12 @@ import (
"sync"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// CreateOps represents operations to create different file types.
@@ -269,7 +270,7 @@ func (d *Dir) Lookup(ctx context.Context, _ *fs.Inode, p string) (*fs.Dirent, er
// Take a reference on the inode before returning it. This reference
// is owned by the dirent we are about to create.
inode.IncRef()
- return fs.NewDirent(inode, p), nil
+ return fs.NewDirent(ctx, inode, p), nil
}
// walkLocked must be called with d.mu held.
@@ -321,7 +322,7 @@ func (d *Dir) Create(ctx context.Context, dir *fs.Inode, name string, flags fs.F
inode.IncRef()
// Create the Dirent and corresponding file.
- created := fs.NewDirent(inode, name)
+ created := fs.NewDirent(ctx, inode, name)
defer created.DecRef()
return created.Inode.GetFile(ctx, created, flags)
}
@@ -382,7 +383,7 @@ func (d *Dir) Bind(ctx context.Context, dir *fs.Inode, name string, ep transport
}
// Take another ref on inode which will be donated to the new dirent.
inode.IncRef()
- return fs.NewDirent(inode, name), nil
+ return fs.NewDirent(ctx, inode, name), nil
}
// CreateFifo implements fs.InodeOperations.CreateFifo.
@@ -430,7 +431,7 @@ func (dfo *dirFileOperations) Seek(ctx context.Context, file *fs.File, whence fs
}
// IterateDir implements DirIterator.IterateDir.
-func (dfo *dirFileOperations) IterateDir(ctx context.Context, dirCtx *fs.DirCtx, offset int) (int, error) {
+func (dfo *dirFileOperations) IterateDir(ctx context.Context, d *fs.Dirent, dirCtx *fs.DirCtx, offset int) (int, error) {
dfo.dir.mu.Lock()
defer dfo.dir.mu.Unlock()
diff --git a/pkg/sentry/fs/ramfs/socket.go b/pkg/sentry/fs/ramfs/socket.go
index 7d8bca70e..a24fe2ea2 100644
--- a/pkg/sentry/fs/ramfs/socket.go
+++ b/pkg/sentry/fs/ramfs/socket.go
@@ -15,12 +15,12 @@
package ramfs
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// Socket represents a socket.
diff --git a/pkg/sentry/fs/ramfs/symlink.go b/pkg/sentry/fs/ramfs/symlink.go
index 21c246169..fcfaa29aa 100644
--- a/pkg/sentry/fs/ramfs/symlink.go
+++ b/pkg/sentry/fs/ramfs/symlink.go
@@ -15,11 +15,11 @@
package ramfs
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// Symlink represents a symlink.
diff --git a/pkg/sentry/fs/ramfs/tree.go b/pkg/sentry/fs/ramfs/tree.go
index 8c6b31f70..702cc4a1e 100644
--- a/pkg/sentry/fs/ramfs/tree.go
+++ b/pkg/sentry/fs/ramfs/tree.go
@@ -19,10 +19,10 @@ import (
"path"
"strings"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/anon"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/anon"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// MakeDirectoryTree constructs a ramfs tree of all directories containing
@@ -68,7 +68,7 @@ func makeSubdir(ctx context.Context, msrc *fs.MountSource, root *Dir, subdir str
// emptyDir returns an empty *ramfs.Dir with all permissions granted.
func emptyDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
dir := NewDir(ctx, make(map[string]*fs.Inode), fs.RootOwner, fs.FilePermsFromMode(0777))
- return fs.NewInode(dir, msrc, fs.StableAttr{
+ return fs.NewInode(ctx, dir, msrc, fs.StableAttr{
DeviceID: anon.PseudoDevice.DeviceID(),
InodeID: anon.PseudoDevice.NextIno(),
BlockSize: usermem.PageSize,
diff --git a/pkg/sentry/fs/ramfs/tree_test.go b/pkg/sentry/fs/ramfs/tree_test.go
index 27abeb6ba..61a7e2900 100644
--- a/pkg/sentry/fs/ramfs/tree_test.go
+++ b/pkg/sentry/fs/ramfs/tree_test.go
@@ -17,12 +17,11 @@ package ramfs
import (
"testing"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
)
func TestMakeDirectoryTree(t *testing.T) {
- mount := fs.NewPseudoMountSource()
for _, test := range []struct {
name string
@@ -54,6 +53,7 @@ func TestMakeDirectoryTree(t *testing.T) {
},
} {
ctx := contexttest.Context(t)
+ mount := fs.NewPseudoMountSource(ctx)
tree, err := MakeDirectoryTree(ctx, mount, test.subdirs)
if err != nil {
t.Errorf("%s: failed to make ramfs tree, got error %v, want nil", test.name, err)
diff --git a/pkg/sentry/fs/save.go b/pkg/sentry/fs/save.go
index 2eaf6ab69..fe5c76b44 100644
--- a/pkg/sentry/fs/save.go
+++ b/pkg/sentry/fs/save.go
@@ -18,7 +18,7 @@ import (
"fmt"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/log"
)
// SaveInodeMappings saves a mapping of path -> inode ID for every
diff --git a/pkg/sentry/fs/splice.go b/pkg/sentry/fs/splice.go
index 65937f44d..eed1c2854 100644
--- a/pkg/sentry/fs/splice.go
+++ b/pkg/sentry/fs/splice.go
@@ -18,9 +18,9 @@ import (
"io"
"sync/atomic"
- "gvisor.googlesource.com/gvisor/pkg/secio"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/secio"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// Splice moves data to this file, directly from another.
@@ -88,6 +88,8 @@ func Splice(ctx context.Context, dst *File, src *File, opts SpliceOpts) (int64,
// Check append-only mode and the limit.
if !dstPipe {
+ unlock := dst.Dirent.Inode.lockAppendMu(dst.Flags().Append)
+ defer unlock()
if dst.Flags().Append {
if opts.DstOffset {
// We need to acquire the lock.
diff --git a/pkg/sentry/fs/sys/BUILD b/pkg/sentry/fs/sys/BUILD
index 42e98230e..70fa3af89 100644
--- a/pkg/sentry/fs/sys/BUILD
+++ b/pkg/sentry/fs/sys/BUILD
@@ -10,7 +10,7 @@ go_library(
"fs.go",
"sys.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/sys",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/fs/sys",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
diff --git a/pkg/sentry/fs/sys/device.go b/pkg/sentry/fs/sys/device.go
index 128d3a9d9..4e79dbb71 100644
--- a/pkg/sentry/fs/sys/device.go
+++ b/pkg/sentry/fs/sys/device.go
@@ -14,7 +14,7 @@
package sys
-import "gvisor.googlesource.com/gvisor/pkg/sentry/device"
+import "gvisor.dev/gvisor/pkg/sentry/device"
// sysfsDevice is the sysfs virtual device.
var sysfsDevice = device.NewAnonDevice()
diff --git a/pkg/sentry/fs/sys/devices.go b/pkg/sentry/fs/sys/devices.go
index 54f35c6a0..4f78ca8d2 100644
--- a/pkg/sentry/fs/sys/devices.go
+++ b/pkg/sentry/fs/sys/devices.go
@@ -17,11 +17,11 @@ package sys
import (
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
)
// +stateify savable
@@ -58,7 +58,7 @@ func newPossible(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
Contents: contents,
},
}
- return newFile(c, msrc)
+ return newFile(ctx, c, msrc)
}
func newCPU(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
diff --git a/pkg/sentry/fs/sys/fs.go b/pkg/sentry/fs/sys/fs.go
index f0c2322e0..e60b63e75 100644
--- a/pkg/sentry/fs/sys/fs.go
+++ b/pkg/sentry/fs/sys/fs.go
@@ -15,8 +15,8 @@
package sys
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
)
// filesystem is a sysfs.
@@ -30,7 +30,7 @@ func init() {
fs.RegisterFilesystem(&filesystem{})
}
-// FilesystemName is the name underwhich the filesystem is registered.
+// FilesystemName is the name under which the filesystem is registered.
// Name matches fs/sysfs/mount.c:sysfs_fs_type.name.
const FilesystemName = "sysfs"
@@ -61,5 +61,5 @@ func (f *filesystem) Mount(ctx context.Context, device string, flags fs.MountSou
// device is always ignored.
// sysfs ignores data, see fs/sysfs/mount.c:sysfs_mount.
- return New(ctx, fs.NewNonCachingMountSource(f, flags)), nil
+ return New(ctx, fs.NewNonCachingMountSource(ctx, f, flags)), nil
}
diff --git a/pkg/sentry/fs/sys/sys.go b/pkg/sentry/fs/sys/sys.go
index d20ef91fa..b14bf3f55 100644
--- a/pkg/sentry/fs/sys/sys.go
+++ b/pkg/sentry/fs/sys/sys.go
@@ -16,25 +16,25 @@
package sys
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/ramfs"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
-func newFile(node fs.InodeOperations, msrc *fs.MountSource) *fs.Inode {
+func newFile(ctx context.Context, node fs.InodeOperations, msrc *fs.MountSource) *fs.Inode {
sattr := fs.StableAttr{
DeviceID: sysfsDevice.DeviceID(),
InodeID: sysfsDevice.NextIno(),
BlockSize: usermem.PageSize,
Type: fs.SpecialFile,
}
- return fs.NewInode(node, msrc, sattr)
+ return fs.NewInode(ctx, node, msrc, sattr)
}
func newDir(ctx context.Context, msrc *fs.MountSource, contents map[string]*fs.Inode) *fs.Inode {
d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555))
- return fs.NewInode(d, msrc, fs.StableAttr{
+ return fs.NewInode(ctx, d, msrc, fs.StableAttr{
DeviceID: sysfsDevice.DeviceID(),
InodeID: sysfsDevice.NextIno(),
BlockSize: usermem.PageSize,
diff --git a/pkg/sentry/fs/timerfd/BUILD b/pkg/sentry/fs/timerfd/BUILD
index 0e06a5028..1d80daeaf 100644
--- a/pkg/sentry/fs/timerfd/BUILD
+++ b/pkg/sentry/fs/timerfd/BUILD
@@ -5,7 +5,7 @@ load("//tools/go_stateify:defs.bzl", "go_library")
go_library(
name = "timerfd",
srcs = ["timerfd.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/timerfd",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/fs/timerfd",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/sentry/context",
diff --git a/pkg/sentry/fs/timerfd/timerfd.go b/pkg/sentry/fs/timerfd/timerfd.go
index c1721f434..59403d9db 100644
--- a/pkg/sentry/fs/timerfd/timerfd.go
+++ b/pkg/sentry/fs/timerfd/timerfd.go
@@ -19,14 +19,14 @@ package timerfd
import (
"sync/atomic"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/anon"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/anon"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// TimerOperations implements fs.FileOperations for timerfds.
@@ -53,7 +53,7 @@ type TimerOperations struct {
// NewFile returns a timerfd File that receives time from c.
func NewFile(ctx context.Context, c ktime.Clock) *fs.File {
- dirent := fs.NewDirent(anon.NewInode(ctx), "anon_inode:[timerfd]")
+ dirent := fs.NewDirent(ctx, anon.NewInode(ctx), "anon_inode:[timerfd]")
// Release the initial dirent reference after NewFile takes a reference.
defer dirent.DecRef()
tops := &TimerOperations{}
diff --git a/pkg/sentry/fs/tmpfs/BUILD b/pkg/sentry/fs/tmpfs/BUILD
index 9570c71e5..8f7eb5757 100644
--- a/pkg/sentry/fs/tmpfs/BUILD
+++ b/pkg/sentry/fs/tmpfs/BUILD
@@ -11,7 +11,7 @@ go_library(
"inode_file.go",
"tmpfs.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/tmpfs",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
diff --git a/pkg/sentry/fs/tmpfs/device.go b/pkg/sentry/fs/tmpfs/device.go
index 179c3a46f..ae7c55ee1 100644
--- a/pkg/sentry/fs/tmpfs/device.go
+++ b/pkg/sentry/fs/tmpfs/device.go
@@ -14,7 +14,7 @@
package tmpfs
-import "gvisor.googlesource.com/gvisor/pkg/sentry/device"
+import "gvisor.dev/gvisor/pkg/sentry/device"
// tmpfsDevice is the kernel tmpfs device.
var tmpfsDevice = device.NewAnonDevice()
diff --git a/pkg/sentry/fs/tmpfs/file_regular.go b/pkg/sentry/fs/tmpfs/file_regular.go
index d1c163879..9a6943fe4 100644
--- a/pkg/sentry/fs/tmpfs/file_regular.go
+++ b/pkg/sentry/fs/tmpfs/file_regular.go
@@ -15,12 +15,12 @@
package tmpfs
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// regularFileOperations implements fs.FileOperations for a regular
diff --git a/pkg/sentry/fs/tmpfs/file_test.go b/pkg/sentry/fs/tmpfs/file_test.go
index b44c06556..0075ef023 100644
--- a/pkg/sentry/fs/tmpfs/file_test.go
+++ b/pkg/sentry/fs/tmpfs/file_test.go
@@ -18,17 +18,17 @@ import (
"bytes"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/contexttest"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
func newFileInode(ctx context.Context) *fs.Inode {
- m := fs.NewCachingMountSource(&Filesystem{}, fs.MountSourceFlags{})
+ m := fs.NewCachingMountSource(ctx, &Filesystem{}, fs.MountSourceFlags{})
iops := NewInMemoryFile(ctx, usage.Tmpfs, fs.WithCurrentTime(ctx, fs.UnstableAttr{}))
- return fs.NewInode(iops, m, fs.StableAttr{
+ return fs.NewInode(ctx, iops, m, fs.StableAttr{
DeviceID: tmpfsDevice.DeviceID(),
InodeID: tmpfsDevice.NextIno(),
BlockSize: usermem.PageSize,
@@ -38,7 +38,7 @@ func newFileInode(ctx context.Context) *fs.Inode {
func newFile(ctx context.Context) *fs.File {
inode := newFileInode(ctx)
- f, _ := inode.GetFile(ctx, fs.NewDirent(inode, "stub"), fs.FileFlags{Read: true, Write: true})
+ f, _ := inode.GetFile(ctx, fs.NewDirent(ctx, inode, "stub"), fs.FileFlags{Read: true, Write: true})
return f
}
diff --git a/pkg/sentry/fs/tmpfs/fs.go b/pkg/sentry/fs/tmpfs/fs.go
index b7c29a4d1..be98ad751 100644
--- a/pkg/sentry/fs/tmpfs/fs.go
+++ b/pkg/sentry/fs/tmpfs/fs.go
@@ -18,10 +18,10 @@ import (
"fmt"
"strconv"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
)
const (
@@ -34,6 +34,16 @@ const (
// GID for the root directory.
rootGIDKey = "gid"
+ // cacheKey sets the caching policy for the mount.
+ cacheKey = "cache"
+
+ // cacheAll uses the virtual file system cache for everything (default).
+ cacheAll = "cache"
+
+ // cacheRevalidate allows dirents to be cached, but revalidates them on each
+ // lookup.
+ cacheRevalidate = "revalidate"
+
// TODO(edahlgren/mpratt): support a tmpfs size limit.
// size = "size"
@@ -55,7 +65,7 @@ func init() {
fs.RegisterFilesystem(&Filesystem{})
}
-// FilesystemName is the name underwhich the filesystem is registered.
+// FilesystemName is the name under which the filesystem is registered.
// Name matches mm/shmem.c:shmem_fs_type.name.
const FilesystemName = "tmpfs"
@@ -122,15 +132,27 @@ func (f *Filesystem) Mount(ctx context.Context, device string, flags fs.MountSou
delete(options, rootGIDKey)
}
+ // Construct a mount which will follow the cache options provided.
+ //
+ // TODO(gvisor.dev/issue/179): There should be no reason to disable
+ // caching once bind mounts are properly supported.
+ var msrc *fs.MountSource
+ switch options[cacheKey] {
+ case "", cacheAll:
+ msrc = fs.NewCachingMountSource(ctx, f, flags)
+ case cacheRevalidate:
+ msrc = fs.NewRevalidatingMountSource(ctx, f, flags)
+ default:
+ return nil, fmt.Errorf("invalid cache policy option %q", options[cacheKey])
+ }
+ delete(options, cacheKey)
+
// Fail if the caller passed us more options than we can parse. They may be
// expecting us to set something we can't set.
if len(options) > 0 {
return nil, fmt.Errorf("unsupported mount options: %v", options)
}
- // Construct a mount which will cache dirents.
- msrc := fs.NewCachingMountSource(f, flags)
-
// Construct the tmpfs root.
return NewDir(ctx, nil, owner, perms, msrc), nil
}
diff --git a/pkg/sentry/fs/tmpfs/inode_file.go b/pkg/sentry/fs/tmpfs/inode_file.go
index 3fe659543..f86dfaa36 100644
--- a/pkg/sentry/fs/tmpfs/inode_file.go
+++ b/pkg/sentry/fs/tmpfs/inode_file.go
@@ -20,18 +20,18 @@ import (
"sync"
"time"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/metric"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/metric"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/safemem"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
var (
@@ -128,7 +128,7 @@ func NewMemfdInode(ctx context.Context, allowSeals bool) *fs.Inode {
if allowSeals {
iops.seals = 0
}
- return fs.NewInode(iops, fs.NewNonCachingMountSource(nil, fs.MountSourceFlags{}), fs.StableAttr{
+ return fs.NewInode(ctx, iops, fs.NewNonCachingMountSource(ctx, nil, fs.MountSourceFlags{}), fs.StableAttr{
Type: fs.RegularFile,
DeviceID: tmpfsDevice.DeviceID(),
InodeID: tmpfsDevice.NextIno(),
diff --git a/pkg/sentry/fs/tmpfs/tmpfs.go b/pkg/sentry/fs/tmpfs/tmpfs.go
index 263d10cfe..0f4497cd6 100644
--- a/pkg/sentry/fs/tmpfs/tmpfs.go
+++ b/pkg/sentry/fs/tmpfs/tmpfs.go
@@ -16,17 +16,17 @@
package tmpfs
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/pipe"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/fs/ramfs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/pipe"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
var fsInfo = fs.Info{
@@ -90,7 +90,7 @@ func NewDir(ctx context.Context, contents map[string]*fs.Inode, owner fs.FileOwn
// Manually set the CreateOps.
d.ramfsDir.CreateOps = d.newCreateOps()
- return fs.NewInode(d, msrc, fs.StableAttr{
+ return fs.NewInode(ctx, d, msrc, fs.StableAttr{
DeviceID: tmpfsDevice.DeviceID(),
InodeID: tmpfsDevice.NextIno(),
BlockSize: usermem.PageSize,
@@ -218,7 +218,7 @@ func (d *Dir) newCreateOps() *ramfs.CreateOps {
Links: 0,
})
iops := NewInMemoryFile(ctx, usage.Tmpfs, uattr)
- return fs.NewInode(iops, dir.MountSource, fs.StableAttr{
+ return fs.NewInode(ctx, iops, dir.MountSource, fs.StableAttr{
DeviceID: tmpfsDevice.DeviceID(),
InodeID: tmpfsDevice.NextIno(),
BlockSize: usermem.PageSize,
@@ -262,7 +262,7 @@ type Symlink struct {
// NewSymlink returns a new symlink with the provided permissions.
func NewSymlink(ctx context.Context, target string, owner fs.FileOwner, msrc *fs.MountSource) *fs.Inode {
s := &Symlink{Symlink: *ramfs.NewSymlink(ctx, owner, target)}
- return fs.NewInode(s, msrc, fs.StableAttr{
+ return fs.NewInode(ctx, s, msrc, fs.StableAttr{
DeviceID: tmpfsDevice.DeviceID(),
InodeID: tmpfsDevice.NextIno(),
BlockSize: usermem.PageSize,
@@ -292,7 +292,7 @@ type Socket struct {
// NewSocket returns a new socket with the provided permissions.
func NewSocket(ctx context.Context, socket transport.BoundEndpoint, owner fs.FileOwner, perms fs.FilePermissions, msrc *fs.MountSource) *fs.Inode {
s := &Socket{Socket: *ramfs.NewSocket(ctx, socket, owner, perms)}
- return fs.NewInode(s, msrc, fs.StableAttr{
+ return fs.NewInode(ctx, s, msrc, fs.StableAttr{
DeviceID: tmpfsDevice.DeviceID(),
InodeID: tmpfsDevice.NextIno(),
BlockSize: usermem.PageSize,
@@ -329,7 +329,7 @@ func NewFifo(ctx context.Context, owner fs.FileOwner, perms fs.FilePermissions,
fifoIops := &Fifo{iops}
// Build a new Inode.
- return fs.NewInode(fifoIops, msrc, fs.StableAttr{
+ return fs.NewInode(ctx, fifoIops, msrc, fs.StableAttr{
DeviceID: tmpfsDevice.DeviceID(),
InodeID: tmpfsDevice.NextIno(),
BlockSize: usermem.PageSize,
diff --git a/pkg/sentry/fs/tty/BUILD b/pkg/sentry/fs/tty/BUILD
index 908d9de09..5e9327aec 100644
--- a/pkg/sentry/fs/tty/BUILD
+++ b/pkg/sentry/fs/tty/BUILD
@@ -13,7 +13,7 @@ go_library(
"slave.go",
"terminal.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/fs/tty",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/fs/tty",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
diff --git a/pkg/sentry/fs/tty/dir.go b/pkg/sentry/fs/tty/dir.go
index 2603354c4..1d128532b 100644
--- a/pkg/sentry/fs/tty/dir.go
+++ b/pkg/sentry/fs/tty/dir.go
@@ -21,15 +21,15 @@ import (
"strconv"
"sync"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// dirInodeOperations is the root of a devpts mount.
@@ -114,7 +114,7 @@ func newDir(ctx context.Context, m *fs.MountSource) *fs.Inode {
InodeID: d.master.StableAttr.InodeID,
})
- return fs.NewInode(d, m, fs.StableAttr{
+ return fs.NewInode(ctx, d, m, fs.StableAttr{
DeviceID: ptsDevice.DeviceID(),
// N.B. Linux always uses inode id 1 for the directory. See
// fs/devpts/inode.c:devpts_fill_super.
@@ -143,7 +143,7 @@ func (d *dirInodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name str
// Master?
if name == "ptmx" {
d.master.IncRef()
- return fs.NewDirent(d.master, name), nil
+ return fs.NewDirent(ctx, d.master, name), nil
}
// Slave number?
@@ -159,7 +159,7 @@ func (d *dirInodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name str
}
s.IncRef()
- return fs.NewDirent(s, name), nil
+ return fs.NewDirent(ctx, s, name), nil
}
// Create implements fs.InodeOperations.Create.
@@ -307,7 +307,7 @@ type dirFileOperations struct {
var _ fs.FileOperations = (*dirFileOperations)(nil)
// IterateDir implements DirIterator.IterateDir.
-func (df *dirFileOperations) IterateDir(ctx context.Context, dirCtx *fs.DirCtx, offset int) (int, error) {
+func (df *dirFileOperations) IterateDir(ctx context.Context, d *fs.Dirent, dirCtx *fs.DirCtx, offset int) (int, error) {
df.di.mu.Lock()
defer df.di.mu.Unlock()
diff --git a/pkg/sentry/fs/tty/fs.go b/pkg/sentry/fs/tty/fs.go
index 701b2f7d9..edee56c12 100644
--- a/pkg/sentry/fs/tty/fs.go
+++ b/pkg/sentry/fs/tty/fs.go
@@ -15,10 +15,10 @@
package tty
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/device"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/device"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// ptsDevice is the pseudo-filesystem device.
@@ -67,7 +67,7 @@ func (f *filesystem) Mount(ctx context.Context, device string, flags fs.MountSou
return nil, syserror.EINVAL
}
- return newDir(ctx, fs.NewMountSource(&superOperations{}, f, flags)), nil
+ return newDir(ctx, fs.NewMountSource(ctx, &superOperations{}, f, flags)), nil
}
// superOperations implements fs.MountSourceOperations, preventing caching.
@@ -94,6 +94,13 @@ func (superOperations) Keep(*fs.Dirent) bool {
return false
}
+// CacheReaddir implements fs.DirentOperations.CacheReaddir.
+//
+// CacheReaddir returns false because entries change on master operations.
+func (superOperations) CacheReaddir() bool {
+ return false
+}
+
// ResetInodeMappings implements MountSourceOperations.ResetInodeMappings.
func (superOperations) ResetInodeMappings() {}
diff --git a/pkg/sentry/fs/tty/line_discipline.go b/pkg/sentry/fs/tty/line_discipline.go
index 20d29d130..7cc0eb409 100644
--- a/pkg/sentry/fs/tty/line_discipline.go
+++ b/pkg/sentry/fs/tty/line_discipline.go
@@ -19,12 +19,12 @@ import (
"sync"
"unicode/utf8"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
const (
diff --git a/pkg/sentry/fs/tty/master.go b/pkg/sentry/fs/tty/master.go
index afdf44cd1..92ec1ca18 100644
--- a/pkg/sentry/fs/tty/master.go
+++ b/pkg/sentry/fs/tty/master.go
@@ -15,15 +15,15 @@
package tty
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/unimpl"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/unimpl"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// masterInodeOperations are the fs.InodeOperations for the master end of the
@@ -46,7 +46,7 @@ func newMasterInode(ctx context.Context, d *dirInodeOperations, owner fs.FileOwn
d: d,
}
- return fs.NewInode(iops, d.msrc, fs.StableAttr{
+ return fs.NewInode(ctx, iops, d.msrc, fs.StableAttr{
DeviceID: ptsDevice.DeviceID(),
// N.B. Linux always uses inode id 2 for ptmx. See
// fs/devpts/inode.c:mknod_ptmx.
@@ -144,7 +144,7 @@ func (mf *masterFileOperations) Write(ctx context.Context, _ *fs.File, src userm
}
// Ioctl implements fs.FileOperations.Ioctl.
-func (mf *masterFileOperations) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+func (mf *masterFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
switch cmd := args[1].Uint(); cmd {
case linux.FIONREAD: // linux.FIONREAD == linux.TIOCINQ
// Get the number of bytes in the output queue read buffer.
diff --git a/pkg/sentry/fs/tty/queue.go b/pkg/sentry/fs/tty/queue.go
index 11fb92be3..231e4e6eb 100644
--- a/pkg/sentry/fs/tty/queue.go
+++ b/pkg/sentry/fs/tty/queue.go
@@ -17,13 +17,13 @@ package tty
import (
"sync"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/safemem"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// waitBufMaxBytes is the maximum size of a wait buffer. It is based on
diff --git a/pkg/sentry/fs/tty/slave.go b/pkg/sentry/fs/tty/slave.go
index 2abf32e57..e30266404 100644
--- a/pkg/sentry/fs/tty/slave.go
+++ b/pkg/sentry/fs/tty/slave.go
@@ -15,14 +15,14 @@
package tty
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// slaveInodeOperations are the fs.InodeOperations for the slave end of the
@@ -51,7 +51,7 @@ func newSlaveInode(ctx context.Context, d *dirInodeOperations, t *Terminal, owne
t: t,
}
- return fs.NewInode(iops, d.msrc, fs.StableAttr{
+ return fs.NewInode(ctx, iops, d.msrc, fs.StableAttr{
DeviceID: ptsDevice.DeviceID(),
// N.B. Linux always uses inode id = tty index + 3. See
// fs/devpts/inode.c:devpts_pty_new.
@@ -128,7 +128,7 @@ func (sf *slaveFileOperations) Write(ctx context.Context, _ *fs.File, src userme
}
// Ioctl implements fs.FileOperations.Ioctl.
-func (sf *slaveFileOperations) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+func (sf *slaveFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
switch cmd := args[1].Uint(); cmd {
case linux.FIONREAD: // linux.FIONREAD == linux.TIOCINQ
// Get the number of bytes in the input queue read buffer.
diff --git a/pkg/sentry/fs/tty/terminal.go b/pkg/sentry/fs/tty/terminal.go
index 2b4160ba5..b7cecb2ed 100644
--- a/pkg/sentry/fs/tty/terminal.go
+++ b/pkg/sentry/fs/tty/terminal.go
@@ -15,9 +15,9 @@
package tty
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/refs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/sentry/context"
)
// Terminal is a pseudoterminal.
@@ -38,9 +38,11 @@ type Terminal struct {
func newTerminal(ctx context.Context, d *dirInodeOperations, n uint32) *Terminal {
termios := linux.DefaultSlaveTermios
- return &Terminal{
+ t := Terminal{
d: d,
n: n,
ld: newLineDiscipline(termios),
}
+ t.EnableLeakCheck("tty.Terminal")
+ return &t
}
diff --git a/pkg/sentry/fs/tty/tty_test.go b/pkg/sentry/fs/tty/tty_test.go
index d2e75a511..59f07ff8e 100644
--- a/pkg/sentry/fs/tty/tty_test.go
+++ b/pkg/sentry/fs/tty/tty_test.go
@@ -17,9 +17,9 @@ package tty
import (
"testing"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
func TestSimpleMasterToSlave(t *testing.T) {
diff --git a/pkg/sentry/hostcpu/BUILD b/pkg/sentry/hostcpu/BUILD
index b5067ae6d..f989f2f8b 100644
--- a/pkg/sentry/hostcpu/BUILD
+++ b/pkg/sentry/hostcpu/BUILD
@@ -8,7 +8,7 @@ go_library(
"getcpu_amd64.s",
"hostcpu.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/hostcpu",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/hostcpu",
visibility = ["//:sandbox"],
)
diff --git a/pkg/sentry/hostmm/BUILD b/pkg/sentry/hostmm/BUILD
index 1a4632a54..67831d5a1 100644
--- a/pkg/sentry/hostmm/BUILD
+++ b/pkg/sentry/hostmm/BUILD
@@ -8,7 +8,7 @@ go_library(
"cgroup.go",
"hostmm.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/hostmm",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/hostmm",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/fd",
diff --git a/pkg/sentry/hostmm/hostmm.go b/pkg/sentry/hostmm/hostmm.go
index 5432cada9..19335ca73 100644
--- a/pkg/sentry/hostmm/hostmm.go
+++ b/pkg/sentry/hostmm/hostmm.go
@@ -22,9 +22,9 @@ import (
"path"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/fd"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/fd"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// NotifyCurrentMemcgPressureCallback requests that f is called whenever the
diff --git a/pkg/sentry/inet/BUILD b/pkg/sentry/inet/BUILD
index e288d34e9..184b566d9 100644
--- a/pkg/sentry/inet/BUILD
+++ b/pkg/sentry/inet/BUILD
@@ -12,6 +12,6 @@ go_library(
"inet.go",
"test_stack.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/inet",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/inet",
deps = ["//pkg/sentry/context"],
)
diff --git a/pkg/sentry/inet/context.go b/pkg/sentry/inet/context.go
index 8550c4793..4eda7dd1f 100644
--- a/pkg/sentry/inet/context.go
+++ b/pkg/sentry/inet/context.go
@@ -15,7 +15,7 @@
package inet
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/context"
)
// contextID is the inet package's type for context.Context.Value keys.
diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD
index 04e375910..e61d39c82 100644
--- a/pkg/sentry/kernel/BUILD
+++ b/pkg/sentry/kernel/BUILD
@@ -31,7 +31,7 @@ go_template_instance(
go_template_instance(
name = "seqatomic_taskgoroutineschedinfo",
- out = "seqatomic_taskgoroutineschedinfo.go",
+ out = "seqatomic_taskgoroutineschedinfo_unsafe.go",
package = "kernel",
suffix = "TaskGoroutineSchedInfo",
template = "//third_party/gvsync:generic_seqatomic",
@@ -85,7 +85,7 @@ proto_library(
go_proto_library(
name = "uncaught_signal_go_proto",
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto",
proto = ":uncaught_signal_proto",
visibility = ["//visibility:public"],
deps = ["//pkg/sentry/arch:registers_go_proto"],
@@ -96,7 +96,8 @@ go_library(
srcs = [
"abstract_socket_namespace.go",
"context.go",
- "fd_map.go",
+ "fd_table.go",
+ "fd_table_unsafe.go",
"fs_context.go",
"ipc_namespace.go",
"kernel.go",
@@ -111,7 +112,7 @@ go_library(
"ptrace_arm64.go",
"rseq.go",
"seccomp.go",
- "seqatomic_taskgoroutineschedinfo.go",
+ "seqatomic_taskgoroutineschedinfo_unsafe.go",
"session_list.go",
"sessions.go",
"signal.go",
@@ -147,11 +148,11 @@ go_library(
"vdso.go",
"version.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/kernel",
imports = [
- "gvisor.googlesource.com/gvisor/pkg/bpf",
- "gvisor.googlesource.com/gvisor/pkg/sentry/device",
- "gvisor.googlesource.com/gvisor/pkg/tcpip",
+ "gvisor.dev/gvisor/pkg/bpf",
+ "gvisor.dev/gvisor/pkg/sentry/device",
+ "gvisor.dev/gvisor/pkg/tcpip",
],
visibility = ["//:sandbox"],
deps = [
@@ -179,7 +180,6 @@ go_library(
"//pkg/sentry/kernel/auth",
"//pkg/sentry/kernel/epoll",
"//pkg/sentry/kernel/futex",
- "//pkg/sentry/kernel/kdefs",
"//pkg/sentry/kernel/sched",
"//pkg/sentry/kernel/semaphore",
"//pkg/sentry/kernel/shm",
@@ -214,7 +214,7 @@ go_test(
name = "kernel_test",
size = "small",
srcs = [
- "fd_map_test.go",
+ "fd_table_test.go",
"table_test.go",
"task_test.go",
"timekeeper_test.go",
@@ -223,9 +223,10 @@ go_test(
deps = [
"//pkg/abi",
"//pkg/sentry/arch",
+ "//pkg/sentry/context",
"//pkg/sentry/context/contexttest",
+ "//pkg/sentry/fs",
"//pkg/sentry/fs/filetest",
- "//pkg/sentry/kernel/kdefs",
"//pkg/sentry/kernel/sched",
"//pkg/sentry/limits",
"//pkg/sentry/pgalloc",
diff --git a/pkg/sentry/kernel/abstract_socket_namespace.go b/pkg/sentry/kernel/abstract_socket_namespace.go
index 5ce52e66c..244655b5c 100644
--- a/pkg/sentry/kernel/abstract_socket_namespace.go
+++ b/pkg/sentry/kernel/abstract_socket_namespace.go
@@ -18,8 +18,8 @@ import (
"sync"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/refs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport"
+ "gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
)
// +stateify savable
diff --git a/pkg/sentry/kernel/auth/BUILD b/pkg/sentry/kernel/auth/BUILD
index abd4f2dae..1d00a6310 100644
--- a/pkg/sentry/kernel/auth/BUILD
+++ b/pkg/sentry/kernel/auth/BUILD
@@ -4,6 +4,17 @@ load("//tools/go_generics:defs.bzl", "go_template_instance")
load("//tools/go_stateify:defs.bzl", "go_library")
go_template_instance(
+ name = "atomicptr_credentials",
+ out = "atomicptr_credentials_unsafe.go",
+ package = "auth",
+ suffix = "Credentials",
+ template = "//third_party/gvsync:generic_atomicptr",
+ types = {
+ "Value": "Credentials",
+ },
+)
+
+go_template_instance(
name = "id_map_range",
out = "id_map_range.go",
package = "auth",
@@ -34,6 +45,7 @@ go_template_instance(
go_library(
name = "auth",
srcs = [
+ "atomicptr_credentials_unsafe.go",
"auth.go",
"capability_set.go",
"context.go",
@@ -45,7 +57,7 @@ go_library(
"id_map_set.go",
"user_namespace.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/auth",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
diff --git a/pkg/sentry/kernel/auth/capability_set.go b/pkg/sentry/kernel/auth/capability_set.go
index 7a0c967cd..fc8c6745c 100644
--- a/pkg/sentry/kernel/auth/capability_set.go
+++ b/pkg/sentry/kernel/auth/capability_set.go
@@ -15,8 +15,8 @@
package auth
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/bits"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/bits"
)
// A CapabilitySet is a set of capabilities implemented as a bitset. The zero
@@ -24,7 +24,7 @@ import (
type CapabilitySet uint64
// AllCapabilities is a CapabilitySet containing all valid capabilities.
-var AllCapabilities = CapabilitySetOf(linux.MaxCapability+1) - 1
+var AllCapabilities = CapabilitySetOf(linux.CAP_LAST_CAP+1) - 1
// CapabilitySetOf returns a CapabilitySet containing only the given
// capability.
diff --git a/pkg/sentry/kernel/auth/context.go b/pkg/sentry/kernel/auth/context.go
index 16d110610..5c0e7d6b6 100644
--- a/pkg/sentry/kernel/auth/context.go
+++ b/pkg/sentry/kernel/auth/context.go
@@ -15,7 +15,7 @@
package auth
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/context"
)
// contextID is the auth package's type for context.Context.Value keys.
diff --git a/pkg/sentry/kernel/auth/credentials.go b/pkg/sentry/kernel/auth/credentials.go
index 1511a0324..e057d2c6d 100644
--- a/pkg/sentry/kernel/auth/credentials.go
+++ b/pkg/sentry/kernel/auth/credentials.go
@@ -15,8 +15,8 @@
package auth
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// Credentials contains information required to authorize privileged operations
diff --git a/pkg/sentry/kernel/auth/id_map.go b/pkg/sentry/kernel/auth/id_map.go
index e5d6028d6..3d74bc610 100644
--- a/pkg/sentry/kernel/auth/id_map.go
+++ b/pkg/sentry/kernel/auth/id_map.go
@@ -15,9 +15,9 @@
package auth
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// MapFromKUID translates kuid, a UID in the root namespace, to a UID in ns.
diff --git a/pkg/sentry/kernel/auth/user_namespace.go b/pkg/sentry/kernel/auth/user_namespace.go
index a40dd668f..af28ccc65 100644
--- a/pkg/sentry/kernel/auth/user_namespace.go
+++ b/pkg/sentry/kernel/auth/user_namespace.go
@@ -18,7 +18,7 @@ import (
"math"
"sync"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// A UserNamespace represents a user namespace. See user_namespaces(7) for
diff --git a/pkg/sentry/kernel/context.go b/pkg/sentry/kernel/context.go
index a1a084eab..e3f5b0d83 100644
--- a/pkg/sentry/kernel/context.go
+++ b/pkg/sentry/kernel/context.go
@@ -15,8 +15,8 @@
package kernel
import (
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/context"
)
// contextID is the kernel package's type for context.Context.Value keys.
diff --git a/pkg/sentry/kernel/contexttest/BUILD b/pkg/sentry/kernel/contexttest/BUILD
index bfb2a0b73..bec13a3d9 100644
--- a/pkg/sentry/kernel/contexttest/BUILD
+++ b/pkg/sentry/kernel/contexttest/BUILD
@@ -6,7 +6,7 @@ go_library(
name = "contexttest",
testonly = 1,
srcs = ["contexttest.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/contexttest",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/sentry/context",
diff --git a/pkg/sentry/kernel/contexttest/contexttest.go b/pkg/sentry/kernel/contexttest/contexttest.go
index ae67e2a25..82f9d8922 100644
--- a/pkg/sentry/kernel/contexttest/contexttest.go
+++ b/pkg/sentry/kernel/contexttest/contexttest.go
@@ -19,11 +19,11 @@ package contexttest
import (
"testing"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/pgalloc"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
)
// Context returns a Context that may be used in tests. Uses ptrace as the
diff --git a/pkg/sentry/kernel/epoll/BUILD b/pkg/sentry/kernel/epoll/BUILD
index 3ac59e13e..f46c43128 100644
--- a/pkg/sentry/kernel/epoll/BUILD
+++ b/pkg/sentry/kernel/epoll/BUILD
@@ -22,7 +22,7 @@ go_library(
"epoll_list.go",
"epoll_state.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/epoll",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/epoll",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/refs",
@@ -30,7 +30,6 @@ go_library(
"//pkg/sentry/fs",
"//pkg/sentry/fs/anon",
"//pkg/sentry/fs/fsutil",
- "//pkg/sentry/kernel/kdefs",
"//pkg/sentry/usermem",
"//pkg/waiter",
],
diff --git a/pkg/sentry/kernel/epoll/epoll.go b/pkg/sentry/kernel/epoll/epoll.go
index 43ae22a5d..9c0a4e1b4 100644
--- a/pkg/sentry/kernel/epoll/epoll.go
+++ b/pkg/sentry/kernel/epoll/epoll.go
@@ -21,14 +21,13 @@ import (
"sync"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/refs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/anon"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/anon"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// Event describes the event mask that was observed and the user data to be
@@ -61,7 +60,7 @@ const (
// +stateify savable
type FileIdentifier struct {
File *fs.File `state:"wait"`
- Fd kdefs.FD
+ Fd int32
}
// pollEntry holds all the state associated with an event poll entry, that is,
@@ -155,7 +154,7 @@ var cycleMu sync.Mutex
// NewEventPoll allocates and initializes a new event poll object.
func NewEventPoll(ctx context.Context) *fs.File {
// name matches fs/eventpoll.c:epoll_create1.
- dirent := fs.NewDirent(anon.NewInode(ctx), fmt.Sprintf("anon_inode:[eventpoll]"))
+ dirent := fs.NewDirent(ctx, anon.NewInode(ctx), fmt.Sprintf("anon_inode:[eventpoll]"))
// Release the initial dirent reference after NewFile takes a reference.
defer dirent.DecRef()
return fs.NewFile(ctx, dirent, fs.FileFlags{}, &EventPoll{
diff --git a/pkg/sentry/kernel/epoll/epoll_state.go b/pkg/sentry/kernel/epoll/epoll_state.go
index 4c3c38f9e..a0d35d350 100644
--- a/pkg/sentry/kernel/epoll/epoll_state.go
+++ b/pkg/sentry/kernel/epoll/epoll_state.go
@@ -15,8 +15,8 @@
package epoll
import (
- "gvisor.googlesource.com/gvisor/pkg/refs"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// afterLoad is invoked by stateify.
diff --git a/pkg/sentry/kernel/epoll/epoll_test.go b/pkg/sentry/kernel/epoll/epoll_test.go
index 49b781b69..4a20d4c82 100644
--- a/pkg/sentry/kernel/epoll/epoll_test.go
+++ b/pkg/sentry/kernel/epoll/epoll_test.go
@@ -17,9 +17,9 @@ package epoll
import (
"testing"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/filetest"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/fs/filetest"
+ "gvisor.dev/gvisor/pkg/waiter"
)
func TestFileDestroyed(t *testing.T) {
diff --git a/pkg/sentry/kernel/eventfd/BUILD b/pkg/sentry/kernel/eventfd/BUILD
index f2f1a1223..1c5f979d4 100644
--- a/pkg/sentry/kernel/eventfd/BUILD
+++ b/pkg/sentry/kernel/eventfd/BUILD
@@ -5,7 +5,7 @@ load("//tools/go_stateify:defs.bzl", "go_library", "go_test")
go_library(
name = "eventfd",
srcs = ["eventfd.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/eventfd",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/eventfd",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
diff --git a/pkg/sentry/kernel/eventfd/eventfd.go b/pkg/sentry/kernel/eventfd/eventfd.go
index fe474cbf0..12f0d429b 100644
--- a/pkg/sentry/kernel/eventfd/eventfd.go
+++ b/pkg/sentry/kernel/eventfd/eventfd.go
@@ -21,15 +21,15 @@ import (
"sync"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/fdnotifier"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/anon"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/fdnotifier"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/anon"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// EventOperations represents an event with the semantics of Linux's file-based event
@@ -68,7 +68,7 @@ type EventOperations struct {
// New creates a new event object with the supplied initial value and mode.
func New(ctx context.Context, initVal uint64, semMode bool) *fs.File {
// name matches fs/eventfd.c:eventfd_file_create.
- dirent := fs.NewDirent(anon.NewInode(ctx), "anon_inode:[eventfd]")
+ dirent := fs.NewDirent(ctx, anon.NewInode(ctx), "anon_inode:[eventfd]")
// Release the initial dirent reference after NewFile takes a reference.
defer dirent.DecRef()
return fs.NewFile(ctx, dirent, fs.FileFlags{Read: true, Write: true}, &EventOperations{
diff --git a/pkg/sentry/kernel/eventfd/eventfd_test.go b/pkg/sentry/kernel/eventfd/eventfd_test.go
index 1159638e5..018c7f3ef 100644
--- a/pkg/sentry/kernel/eventfd/eventfd_test.go
+++ b/pkg/sentry/kernel/eventfd/eventfd_test.go
@@ -17,9 +17,9 @@ package eventfd
import (
"testing"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/waiter"
)
func TestEventfd(t *testing.T) {
diff --git a/pkg/sentry/kernel/fasync/BUILD b/pkg/sentry/kernel/fasync/BUILD
index 59b4a49e1..5eddca115 100644
--- a/pkg/sentry/kernel/fasync/BUILD
+++ b/pkg/sentry/kernel/fasync/BUILD
@@ -5,7 +5,7 @@ load("//tools/go_stateify:defs.bzl", "go_library")
go_library(
name = "fasync",
srcs = ["fasync.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/fasync",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/fasync",
visibility = ["//:sandbox"],
deps = [
"//pkg/abi/linux",
diff --git a/pkg/sentry/kernel/fasync/fasync.go b/pkg/sentry/kernel/fasync/fasync.go
index 84cd08501..6b0bb0324 100644
--- a/pkg/sentry/kernel/fasync/fasync.go
+++ b/pkg/sentry/kernel/fasync/fasync.go
@@ -18,11 +18,11 @@ package fasync
import (
"sync"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// New creates a new FileAsync.
@@ -34,9 +34,23 @@ func New() fs.FileAsync {
//
// +stateify savable
type FileAsync struct {
- mu sync.Mutex `state:"nosave"`
- e waiter.Entry
- requester *auth.Credentials
+ // e is immutable after first use (which is protected by mu below).
+ e waiter.Entry
+
+ // regMu protects registeration and unregistration actions on e.
+ //
+ // regMu must be held while registration decisions are being made
+ // through the registration action itself.
+ //
+ // Lock ordering: regMu, mu.
+ regMu sync.Mutex `state:"nosave"`
+
+ // mu protects all following fields.
+ //
+ // Lock ordering: e.mu, mu.
+ mu sync.Mutex `state:"nosave"`
+ requester *auth.Credentials
+ registered bool
// Only one of the following is allowed to be non-nil.
recipientPG *kernel.ProcessGroup
@@ -47,7 +61,7 @@ type FileAsync struct {
// Callback sends a signal.
func (a *FileAsync) Callback(e *waiter.Entry) {
a.mu.Lock()
- if a.e.Callback == nil {
+ if !a.registered {
a.mu.Unlock()
return
}
@@ -80,14 +94,21 @@ func (a *FileAsync) Callback(e *waiter.Entry) {
//
// The file must not be currently registered.
func (a *FileAsync) Register(w waiter.Waitable) {
+ a.regMu.Lock()
+ defer a.regMu.Unlock()
a.mu.Lock()
- defer a.mu.Unlock()
- if a.e.Callback != nil {
+ if a.registered {
+ a.mu.Unlock()
panic("registering already registered file")
}
- a.e.Callback = a
+ if a.e.Callback == nil {
+ a.e.Callback = a
+ }
+ a.registered = true
+
+ a.mu.Unlock()
w.EventRegister(&a.e, waiter.EventIn|waiter.EventOut|waiter.EventErr|waiter.EventHUp)
}
@@ -95,15 +116,19 @@ func (a *FileAsync) Register(w waiter.Waitable) {
//
// The file must be currently registered.
func (a *FileAsync) Unregister(w waiter.Waitable) {
+ a.regMu.Lock()
+ defer a.regMu.Unlock()
a.mu.Lock()
- defer a.mu.Unlock()
- if a.e.Callback == nil {
+ if !a.registered {
+ a.mu.Unlock()
panic("unregistering unregistered file")
}
+ a.registered = false
+
+ a.mu.Unlock()
w.EventUnregister(&a.e)
- a.e.Callback = nil
}
// Owner returns who is currently getting signals. All return values will be
diff --git a/pkg/sentry/kernel/fd_map.go b/pkg/sentry/kernel/fd_map.go
deleted file mode 100644
index c5636d233..000000000
--- a/pkg/sentry/kernel/fd_map.go
+++ /dev/null
@@ -1,364 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package kernel
-
-import (
- "bytes"
- "fmt"
- "sort"
- "sync"
- "sync/atomic"
- "syscall"
-
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/refs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/lock"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/limits"
-)
-
-// FDs is an ordering of FD's that can be made stable.
-type FDs []kdefs.FD
-
-func (f FDs) Len() int {
- return len(f)
-}
-
-func (f FDs) Swap(i, j int) {
- f[i], f[j] = f[j], f[i]
-}
-
-func (f FDs) Less(i, j int) bool {
- return f[i] < f[j]
-}
-
-// FDFlags define flags for an individual descriptor.
-//
-// +stateify savable
-type FDFlags struct {
- // CloseOnExec indicates the descriptor should be closed on exec.
- CloseOnExec bool
-}
-
-// ToLinuxFileFlags converts a kernel.FDFlags object to a Linux file flags
-// representation.
-func (f FDFlags) ToLinuxFileFlags() (mask uint) {
- if f.CloseOnExec {
- mask |= linux.O_CLOEXEC
- }
- return
-}
-
-// ToLinuxFDFlags converts a kernel.FDFlags object to a Linux descriptor flags
-// representation.
-func (f FDFlags) ToLinuxFDFlags() (mask uint) {
- if f.CloseOnExec {
- mask |= linux.FD_CLOEXEC
- }
- return
-}
-
-// descriptor holds the details about a file descriptor, namely a pointer the
-// file itself and the descriptor flags.
-//
-// +stateify savable
-type descriptor struct {
- file *fs.File
- flags FDFlags
-}
-
-// FDMap is used to manage File references and flags.
-//
-// +stateify savable
-type FDMap struct {
- refs.AtomicRefCount
- k *Kernel
- files map[kdefs.FD]descriptor
- mu sync.RWMutex `state:"nosave"`
- uid uint64
-}
-
-// ID returns a unique identifier for this FDMap.
-func (f *FDMap) ID() uint64 {
- return f.uid
-}
-
-// NewFDMap allocates a new FDMap that may be used by tasks in k.
-func (k *Kernel) NewFDMap() *FDMap {
- return &FDMap{
- k: k,
- files: make(map[kdefs.FD]descriptor),
- uid: atomic.AddUint64(&k.fdMapUids, 1),
- }
-}
-
-// destroy removes all of the file descriptors from the map.
-func (f *FDMap) destroy() {
- f.RemoveIf(func(*fs.File, FDFlags) bool {
- return true
- })
-}
-
-// DecRef implements RefCounter.DecRef with destructor f.destroy.
-func (f *FDMap) DecRef() {
- f.DecRefWithDestructor(f.destroy)
-}
-
-// Size returns the number of file descriptor slots currently allocated.
-func (f *FDMap) Size() int {
- f.mu.RLock()
- defer f.mu.RUnlock()
-
- return len(f.files)
-}
-
-// String is a stringer for FDMap.
-func (f *FDMap) String() string {
- f.mu.RLock()
- defer f.mu.RUnlock()
-
- var b bytes.Buffer
- for k, v := range f.files {
- n, _ := v.file.Dirent.FullName(nil /* root */)
- b.WriteString(fmt.Sprintf("\tfd:%d => name %s\n", k, n))
- }
- return b.String()
-}
-
-// NewFDFrom allocates a new FD guaranteed to be the lowest number available
-// greater than or equal to from. This property is important as Unix programs
-// tend to count on this allocation order.
-func (f *FDMap) NewFDFrom(fd kdefs.FD, file *fs.File, flags FDFlags, limitSet *limits.LimitSet) (kdefs.FD, error) {
- if fd < 0 {
- // Don't accept negative FDs.
- return 0, syscall.EINVAL
- }
-
- f.mu.Lock()
- defer f.mu.Unlock()
-
- // Finds the lowest fd not in the handles map.
- lim := limitSet.Get(limits.NumberOfFiles)
- for i := fd; lim.Cur == limits.Infinity || i < kdefs.FD(lim.Cur); i++ {
- if _, ok := f.files[i]; !ok {
- file.IncRef()
- f.files[i] = descriptor{file, flags}
- return i, nil
- }
- }
-
- return -1, syscall.EMFILE
-}
-
-// NewFDAt sets the file reference for the given FD. If there is an
-// active reference for that FD, the ref count for that existing reference
-// is decremented.
-func (f *FDMap) NewFDAt(fd kdefs.FD, file *fs.File, flags FDFlags, limitSet *limits.LimitSet) error {
- if fd < 0 {
- // Don't accept negative FDs.
- return syscall.EBADF
- }
-
- // In this one case we do not do a defer of the Unlock. The
- // reason is that we must have done all the work needed for
- // discarding any old open file before we return to the
- // caller. In other words, the DecRef(), below, must have
- // completed by the time we return to the caller to ensure
- // side effects are, in fact, effected. A classic example is
- // dup2(fd1, fd2); if fd2 was already open, it must be closed,
- // and we don't want to resume the caller until it is; we have
- // to block on the DecRef(). Hence we can not just do a 'go
- // oldfile.DecRef()', since there would be no guarantee that
- // it would be done before we the caller resumed. Since we
- // must wait for the DecRef() to finish, and that could take
- // time, it's best to first call f.muUnlock beore so we are
- // not blocking other uses of this FDMap on the DecRef() call.
- f.mu.Lock()
- oldDesc, oldExists := f.files[fd]
- lim := limitSet.Get(limits.NumberOfFiles).Cur
- // if we're closing one then the effective limit is one
- // more than the actual limit.
- if oldExists && lim != limits.Infinity {
- lim++
- }
- if lim != limits.Infinity && fd >= kdefs.FD(lim) {
- f.mu.Unlock()
- return syscall.EMFILE
- }
-
- file.IncRef()
- f.files[fd] = descriptor{file, flags}
- f.mu.Unlock()
-
- if oldExists {
- oldDesc.file.DecRef()
- }
- return nil
-}
-
-// SetFlags sets the flags for the given file descriptor, if it is valid.
-func (f *FDMap) SetFlags(fd kdefs.FD, flags FDFlags) {
- f.mu.Lock()
- defer f.mu.Unlock()
-
- desc, ok := f.files[fd]
- if !ok {
- return
- }
-
- f.files[fd] = descriptor{desc.file, flags}
-}
-
-// GetDescriptor returns a reference to the file and the flags for the FD. It
-// bumps its reference count as well. It returns nil if there is no File
-// for the FD, i.e. if the FD is invalid. The caller must use DecRef
-// when they are done.
-func (f *FDMap) GetDescriptor(fd kdefs.FD) (*fs.File, FDFlags) {
- f.mu.RLock()
- defer f.mu.RUnlock()
-
- if desc, ok := f.files[fd]; ok {
- desc.file.IncRef()
- return desc.file, desc.flags
- }
- return nil, FDFlags{}
-}
-
-// GetFile returns a reference to the File for the FD and bumps
-// its reference count as well. It returns nil if there is no File
-// for the FD, i.e. if the FD is invalid. The caller must use DecRef
-// when they are done.
-func (f *FDMap) GetFile(fd kdefs.FD) *fs.File {
- f.mu.RLock()
- if desc, ok := f.files[fd]; ok {
- desc.file.IncRef()
- f.mu.RUnlock()
- return desc.file
- }
- f.mu.RUnlock()
- return nil
-}
-
-// fds returns an ordering of FDs.
-func (f *FDMap) fds() FDs {
- fds := make(FDs, 0, len(f.files))
- for fd := range f.files {
- fds = append(fds, fd)
- }
- sort.Sort(fds)
- return fds
-}
-
-// GetFDs returns a list of valid fds.
-func (f *FDMap) GetFDs() FDs {
- f.mu.RLock()
- defer f.mu.RUnlock()
- return f.fds()
-}
-
-// GetRefs returns a stable slice of references to all files and bumps the
-// reference count on each. The caller must use DecRef on each reference when
-// they're done using the slice.
-func (f *FDMap) GetRefs() []*fs.File {
- f.mu.RLock()
- defer f.mu.RUnlock()
-
- fds := f.fds()
- fs := make([]*fs.File, 0, len(fds))
- for _, fd := range fds {
- desc := f.files[fd]
- desc.file.IncRef()
- fs = append(fs, desc.file)
- }
- return fs
-}
-
-// Fork returns an independent FDMap pointing to the same descriptors.
-func (f *FDMap) Fork() *FDMap {
- f.mu.RLock()
- defer f.mu.RUnlock()
-
- clone := f.k.NewFDMap()
-
- // Grab a extra reference for every file.
- for fd, desc := range f.files {
- desc.file.IncRef()
- clone.files[fd] = desc
- }
-
- // That's it!
- return clone
-}
-
-// unlock releases all file locks held by this FDMap's uid. Must only be
-// called on a non-nil *fs.File.
-func (f *FDMap) unlock(file *fs.File) {
- id := lock.UniqueID(f.ID())
- file.Dirent.Inode.LockCtx.Posix.UnlockRegion(id, lock.LockRange{0, lock.LockEOF})
-}
-
-// inotifyFileClose generates the appropriate inotify events for f being closed.
-func inotifyFileClose(f *fs.File) {
- var ev uint32
- d := f.Dirent
-
- if fs.IsDir(d.Inode.StableAttr) {
- ev |= linux.IN_ISDIR
- }
-
- if f.Flags().Write {
- ev |= linux.IN_CLOSE_WRITE
- } else {
- ev |= linux.IN_CLOSE_NOWRITE
- }
-
- d.InotifyEvent(ev, 0)
-}
-
-// Remove removes an FD from the FDMap, and returns (File, true) if a File
-// one was found. Callers are expected to decrement the reference count on
-// the File. Otherwise returns (nil, false).
-func (f *FDMap) Remove(fd kdefs.FD) (*fs.File, bool) {
- f.mu.Lock()
- desc := f.files[fd]
- delete(f.files, fd)
- f.mu.Unlock()
- if desc.file != nil {
- f.unlock(desc.file)
- inotifyFileClose(desc.file)
- return desc.file, true
- }
- return nil, false
-}
-
-// RemoveIf removes all FDs where cond is true.
-func (f *FDMap) RemoveIf(cond func(*fs.File, FDFlags) bool) {
- var removed []*fs.File
- f.mu.Lock()
- for fd, desc := range f.files {
- if desc.file != nil && cond(desc.file, desc.flags) {
- delete(f.files, fd)
- removed = append(removed, desc.file)
- }
- }
- f.mu.Unlock()
-
- for _, file := range removed {
- f.unlock(file)
- inotifyFileClose(file)
- file.DecRef()
- }
-}
diff --git a/pkg/sentry/kernel/fd_map_test.go b/pkg/sentry/kernel/fd_map_test.go
deleted file mode 100644
index 22db4c7cf..000000000
--- a/pkg/sentry/kernel/fd_map_test.go
+++ /dev/null
@@ -1,136 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package kernel
-
-import (
- "testing"
-
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/filetest"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/limits"
-)
-
-const (
- // maxFD is the maximum FD to try to create in the map.
- // This number of open files has been seen in the wild.
- maxFD = 2 * 1024
-)
-
-func newTestFDMap() *FDMap {
- return &FDMap{
- files: make(map[kdefs.FD]descriptor),
- }
-}
-
-// TestFDMapMany allocates maxFD FDs, i.e. maxes out the FDMap,
-// until there is no room, then makes sure that NewFDAt works
-// and also that if we remove one and add one that works too.
-func TestFDMapMany(t *testing.T) {
- file := filetest.NewTestFile(t)
- limitSet := limits.NewLimitSet()
- limitSet.Set(limits.NumberOfFiles, limits.Limit{maxFD, maxFD}, true /* privileged */)
-
- f := newTestFDMap()
- for i := 0; i < maxFD; i++ {
- if _, err := f.NewFDFrom(0, file, FDFlags{}, limitSet); err != nil {
- t.Fatalf("Allocated %v FDs but wanted to allocate %v", i, maxFD)
- }
- }
-
- if _, err := f.NewFDFrom(0, file, FDFlags{}, limitSet); err == nil {
- t.Fatalf("f.NewFDFrom(0, r) in full map: got nil, wanted error")
- }
-
- if err := f.NewFDAt(1, file, FDFlags{}, limitSet); err != nil {
- t.Fatalf("f.NewFDAt(1, r, FDFlags{}): got %v, wanted nil", err)
- }
-}
-
-// TestFDMap does a set of simple tests to make sure simple adds,
-// removes, GetRefs, and DecRefs work. The ordering is just weird
-// enough that a table-driven approach seemed clumsy.
-func TestFDMap(t *testing.T) {
- file := filetest.NewTestFile(t)
- limitSet := limits.NewLimitSet()
- limitSet.Set(limits.NumberOfFiles, limits.Limit{1, maxFD}, true /* privileged */)
-
- f := newTestFDMap()
- if _, err := f.NewFDFrom(0, file, FDFlags{}, limitSet); err != nil {
- t.Fatalf("Adding an FD to an empty 1-size map: got %v, want nil", err)
- }
-
- if _, err := f.NewFDFrom(0, file, FDFlags{}, limitSet); err == nil {
- t.Fatalf("Adding an FD to a filled 1-size map: got nil, wanted an error")
- }
-
- largeLimit := limits.Limit{maxFD, maxFD}
- limitSet.Set(limits.NumberOfFiles, largeLimit, true /* privileged */)
-
- if fd, err := f.NewFDFrom(0, file, FDFlags{}, limitSet); err != nil {
- t.Fatalf("Adding an FD to a resized map: got %v, want nil", err)
- } else if fd != kdefs.FD(1) {
- t.Fatalf("Added an FD to a resized map: got %v, want 1", fd)
- }
-
- if err := f.NewFDAt(1, file, FDFlags{}, limitSet); err != nil {
- t.Fatalf("Replacing FD 1 via f.NewFDAt(1, r, FDFlags{}): got %v, wanted nil", err)
- }
-
- if err := f.NewFDAt(maxFD+1, file, FDFlags{}, limitSet); err == nil {
- t.Fatalf("Using an FD that was too large via f.NewFDAt(%v, r, FDFlags{}): got nil, wanted an error", maxFD+1)
- }
-
- if ref := f.GetFile(1); ref == nil {
- t.Fatalf("f.GetFile(1): got nil, wanted %v", file)
- }
-
- if ref := f.GetFile(2); ref != nil {
- t.Fatalf("f.GetFile(2): got a %v, wanted nil", ref)
- }
-
- ref, ok := f.Remove(1)
- if !ok {
- t.Fatalf("f.Remove(1) for an existing FD: failed, want success")
- }
- ref.DecRef()
-
- if ref, ok := f.Remove(1); ok {
- ref.DecRef()
- t.Fatalf("r.Remove(1) for a removed FD: got success, want failure")
- }
-
-}
-
-func TestDescriptorFlags(t *testing.T) {
- file := filetest.NewTestFile(t)
- f := newTestFDMap()
- limitSet := limits.NewLimitSet()
- limitSet.Set(limits.NumberOfFiles, limits.Limit{maxFD, maxFD}, true /* privileged */)
-
- origFlags := FDFlags{CloseOnExec: true}
-
- if err := f.NewFDAt(2, file, origFlags, limitSet); err != nil {
- t.Fatalf("f.NewFDAt(2, r, FDFlags{}): got %v, wanted nil", err)
- }
-
- newFile, newFlags := f.GetDescriptor(2)
- if newFile == nil {
- t.Fatalf("f.GetFile(2): got a %v, wanted nil", newFile)
- }
-
- if newFlags != origFlags {
- t.Fatalf("new File flags %+v don't match original %+v", newFlags, origFlags)
- }
-}
diff --git a/pkg/sentry/kernel/fd_table.go b/pkg/sentry/kernel/fd_table.go
new file mode 100644
index 000000000..1f3a57dc1
--- /dev/null
+++ b/pkg/sentry/kernel/fd_table.go
@@ -0,0 +1,380 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package kernel
+
+import (
+ "bytes"
+ "fmt"
+ "math"
+ "sync"
+ "sync/atomic"
+ "syscall"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/lock"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
+)
+
+// FDFlags define flags for an individual descriptor.
+//
+// +stateify savable
+type FDFlags struct {
+ // CloseOnExec indicates the descriptor should be closed on exec.
+ CloseOnExec bool
+}
+
+// ToLinuxFileFlags converts a kernel.FDFlags object to a Linux file flags
+// representation.
+func (f FDFlags) ToLinuxFileFlags() (mask uint) {
+ if f.CloseOnExec {
+ mask |= linux.O_CLOEXEC
+ }
+ return
+}
+
+// ToLinuxFDFlags converts a kernel.FDFlags object to a Linux descriptor flags
+// representation.
+func (f FDFlags) ToLinuxFDFlags() (mask uint) {
+ if f.CloseOnExec {
+ mask |= linux.FD_CLOEXEC
+ }
+ return
+}
+
+// descriptor holds the details about a file descriptor, namely a pointer to
+// the file itself and the descriptor flags.
+//
+// Note that this is immutable and can only be changed via operations on the
+// descriptorTable.
+//
+// +stateify savable
+type descriptor struct {
+ file *fs.File
+ flags FDFlags
+}
+
+// FDTable is used to manage File references and flags.
+//
+// +stateify savable
+type FDTable struct {
+ refs.AtomicRefCount
+ k *Kernel
+
+ // uid is a unique identifier.
+ uid uint64
+
+ // mu protects below.
+ mu sync.Mutex `state:"nosave"`
+
+ // used contains the number of non-nil entries.
+ used int32
+
+ // descriptorTable holds descriptors.
+ descriptorTable `state:".(map[int32]descriptor)"`
+}
+
+func (f *FDTable) saveDescriptorTable() map[int32]descriptor {
+ m := make(map[int32]descriptor)
+ f.forEach(func(fd int32, file *fs.File, flags FDFlags) {
+ m[fd] = descriptor{
+ file: file,
+ flags: flags,
+ }
+ })
+ return m
+}
+
+func (f *FDTable) loadDescriptorTable(m map[int32]descriptor) {
+ f.init() // Initialize table.
+ for fd, d := range m {
+ f.set(fd, d.file, d.flags)
+
+ // Note that we do _not_ need to acquire a extra table
+ // reference here. The table reference will already be
+ // accounted for in the file, so we drop the reference taken by
+ // set above.
+ d.file.DecRef()
+ }
+}
+
+// drop drops the table reference.
+func (f *FDTable) drop(file *fs.File) {
+ // Release locks.
+ file.Dirent.Inode.LockCtx.Posix.UnlockRegion(lock.UniqueID(f.uid), lock.LockRange{0, lock.LockEOF})
+
+ // Send inotify events.
+ d := file.Dirent
+ var ev uint32
+ if fs.IsDir(d.Inode.StableAttr) {
+ ev |= linux.IN_ISDIR
+ }
+ if file.Flags().Write {
+ ev |= linux.IN_CLOSE_WRITE
+ } else {
+ ev |= linux.IN_CLOSE_NOWRITE
+ }
+ d.InotifyEvent(ev, 0)
+
+ // Drop the table reference.
+ file.DecRef()
+}
+
+// ID returns a unique identifier for this FDTable.
+func (f *FDTable) ID() uint64 {
+ return f.uid
+}
+
+// NewFDTable allocates a new FDTable that may be used by tasks in k.
+func (k *Kernel) NewFDTable() *FDTable {
+ f := &FDTable{
+ k: k,
+ uid: atomic.AddUint64(&k.fdMapUids, 1),
+ }
+ f.init()
+ return f
+}
+
+// destroy removes all of the file descriptors from the map.
+func (f *FDTable) destroy() {
+ f.RemoveIf(func(*fs.File, FDFlags) bool {
+ return true
+ })
+}
+
+// DecRef implements RefCounter.DecRef with destructor f.destroy.
+func (f *FDTable) DecRef() {
+ f.DecRefWithDestructor(f.destroy)
+}
+
+// Size returns the number of file descriptor slots currently allocated.
+func (f *FDTable) Size() int {
+ size := atomic.LoadInt32(&f.used)
+ return int(size)
+}
+
+// forEach iterates over all non-nil files.
+//
+// It is the caller's responsibility to acquire an appropriate lock.
+func (f *FDTable) forEach(fn func(fd int32, file *fs.File, flags FDFlags)) {
+ fd := int32(0)
+ for {
+ file, flags, ok := f.get(fd)
+ if !ok {
+ break
+ }
+ if file != nil {
+ if !file.TryIncRef() {
+ continue // Race caught.
+ }
+ fn(int32(fd), file, flags)
+ file.DecRef()
+ }
+ fd++
+ }
+}
+
+// String is a stringer for FDTable.
+func (f *FDTable) String() string {
+ var b bytes.Buffer
+ f.forEach(func(fd int32, file *fs.File, flags FDFlags) {
+ n, _ := file.Dirent.FullName(nil /* root */)
+ b.WriteString(fmt.Sprintf("\tfd:%d => name %s\n", fd, n))
+ })
+ return b.String()
+}
+
+// NewFDs allocates new FDs guaranteed to be the lowest number available
+// greater than or equal to the fd parameter. All files will share the set
+// flags. Success is guaranteed to be all or none.
+func (f *FDTable) NewFDs(ctx context.Context, fd int32, files []*fs.File, flags FDFlags) (fds []int32, err error) {
+ if fd < 0 {
+ // Don't accept negative FDs.
+ return nil, syscall.EINVAL
+ }
+
+ // Default limit.
+ end := int32(math.MaxInt32)
+
+ // Ensure we don't get past the provided limit.
+ if limitSet := limits.FromContext(ctx); limitSet != nil {
+ lim := limitSet.Get(limits.NumberOfFiles)
+ if lim.Cur != limits.Infinity {
+ end = int32(lim.Cur)
+ }
+ if fd >= end {
+ return nil, syscall.EMFILE
+ }
+ }
+
+ f.mu.Lock()
+ defer f.mu.Unlock()
+
+ // Install all entries.
+ for i := fd; i < end && len(fds) < len(files); i++ {
+ if d, _, _ := f.get(i); d == nil {
+ f.set(i, files[len(fds)], flags) // Set the descriptor.
+ fds = append(fds, i) // Record the file descriptor.
+ }
+ }
+
+ // Failure? Unwind existing FDs.
+ if len(fds) < len(files) {
+ for _, i := range fds {
+ f.set(i, nil, FDFlags{}) // Zap entry.
+ }
+ return nil, syscall.EMFILE
+ }
+
+ return fds, nil
+}
+
+// NewFDAt sets the file reference for the given FD. If there is an active
+// reference for that FD, the ref count for that existing reference is
+// decremented.
+func (f *FDTable) NewFDAt(ctx context.Context, fd int32, file *fs.File, flags FDFlags) error {
+ if fd < 0 {
+ // Don't accept negative FDs.
+ return syscall.EBADF
+ }
+
+ f.mu.Lock()
+ defer f.mu.Unlock()
+
+ // Check the limit for the provided file.
+ if limitSet := limits.FromContext(ctx); limitSet != nil {
+ if lim := limitSet.Get(limits.NumberOfFiles); lim.Cur != limits.Infinity && uint64(fd) >= lim.Cur {
+ return syscall.EMFILE
+ }
+ }
+
+ // Install the entry.
+ f.set(fd, file, flags)
+ return nil
+}
+
+// SetFlags sets the flags for the given file descriptor.
+//
+// True is returned iff flags were changed.
+func (f *FDTable) SetFlags(fd int32, flags FDFlags) error {
+ if fd < 0 {
+ // Don't accept negative FDs.
+ return syscall.EBADF
+ }
+
+ f.mu.Lock()
+ defer f.mu.Unlock()
+
+ file, _, _ := f.get(fd)
+ if file == nil {
+ // No file found.
+ return syscall.EBADF
+ }
+
+ // Update the flags.
+ f.set(fd, file, flags)
+ return nil
+}
+
+// Get returns a reference to the file and the flags for the FD or nil if no
+// file is defined for the given fd.
+//
+// N.B. Callers are required to use DecRef when they are done.
+//
+//go:nosplit
+func (f *FDTable) Get(fd int32) (*fs.File, FDFlags) {
+ if fd < 0 {
+ return nil, FDFlags{}
+ }
+
+ for {
+ file, flags, _ := f.get(fd)
+ if file != nil {
+ if !file.TryIncRef() {
+ continue // Race caught.
+ }
+ // Reference acquired.
+ return file, flags
+ }
+ // No file available.
+ return nil, FDFlags{}
+ }
+}
+
+// GetFDs returns a list of valid fds.
+func (f *FDTable) GetFDs() []int32 {
+ fds := make([]int32, 0, f.used)
+ f.forEach(func(fd int32, file *fs.File, flags FDFlags) {
+ fds = append(fds, fd)
+ })
+ return fds
+}
+
+// GetRefs returns a stable slice of references to all files and bumps the
+// reference count on each. The caller must use DecRef on each reference when
+// they're done using the slice.
+func (f *FDTable) GetRefs() []*fs.File {
+ files := make([]*fs.File, 0, f.Size())
+ f.forEach(func(_ int32, file *fs.File, flags FDFlags) {
+ file.IncRef() // Acquire a reference for caller.
+ files = append(files, file)
+ })
+ return files
+}
+
+// Fork returns an independent FDTable.
+func (f *FDTable) Fork() *FDTable {
+ clone := f.k.NewFDTable()
+
+ f.forEach(func(fd int32, file *fs.File, flags FDFlags) {
+ // The set function here will acquire an appropriate table
+ // reference for the clone. We don't need anything else.
+ clone.set(fd, file, flags)
+ })
+ return clone
+}
+
+// Remove removes an FD from and returns a non-file iff successful.
+//
+// N.B. Callers are required to use DecRef when they are done.
+func (f *FDTable) Remove(fd int32) *fs.File {
+ if fd < 0 {
+ return nil
+ }
+
+ f.mu.Lock()
+ defer f.mu.Unlock()
+
+ orig, _, _ := f.get(fd)
+ if orig != nil {
+ orig.IncRef() // Reference for caller.
+ f.set(fd, nil, FDFlags{}) // Zap entry.
+ }
+ return orig
+}
+
+// RemoveIf removes all FDs where cond is true.
+func (f *FDTable) RemoveIf(cond func(*fs.File, FDFlags) bool) {
+ f.mu.Lock()
+ defer f.mu.Unlock()
+
+ f.forEach(func(fd int32, file *fs.File, flags FDFlags) {
+ if cond(file, flags) {
+ f.set(fd, nil, FDFlags{}) // Clear from table.
+ }
+ })
+}
diff --git a/pkg/sentry/kernel/fd_table_test.go b/pkg/sentry/kernel/fd_table_test.go
new file mode 100644
index 000000000..2413788e7
--- /dev/null
+++ b/pkg/sentry/kernel/fd_table_test.go
@@ -0,0 +1,192 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package kernel
+
+import (
+ "runtime"
+ "sync"
+ "testing"
+
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/filetest"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
+)
+
+const (
+ // maxFD is the maximum FD to try to create in the map.
+ //
+ // This number of open files has been seen in the wild.
+ maxFD = 2 * 1024
+)
+
+func runTest(t testing.TB, fn func(ctx context.Context, fdTable *FDTable, file *fs.File, limitSet *limits.LimitSet)) {
+ t.Helper() // Don't show in stacks.
+
+ // Create the limits and context.
+ limitSet := limits.NewLimitSet()
+ limitSet.Set(limits.NumberOfFiles, limits.Limit{maxFD, maxFD}, true)
+ ctx := contexttest.WithLimitSet(contexttest.Context(t), limitSet)
+
+ // Create a test file.;
+ file := filetest.NewTestFile(t)
+
+ // Create the table.
+ fdTable := new(FDTable)
+ fdTable.init()
+
+ // Run the test.
+ fn(ctx, fdTable, file, limitSet)
+}
+
+// TestFDTableMany allocates maxFD FDs, i.e. maxes out the FDTable, until there
+// is no room, then makes sure that NewFDAt works and also that if we remove
+// one and add one that works too.
+func TestFDTableMany(t *testing.T) {
+ runTest(t, func(ctx context.Context, fdTable *FDTable, file *fs.File, _ *limits.LimitSet) {
+ for i := 0; i < maxFD; i++ {
+ if _, err := fdTable.NewFDs(ctx, 0, []*fs.File{file}, FDFlags{}); err != nil {
+ t.Fatalf("Allocated %v FDs but wanted to allocate %v", i, maxFD)
+ }
+ }
+
+ if _, err := fdTable.NewFDs(ctx, 0, []*fs.File{file}, FDFlags{}); err == nil {
+ t.Fatalf("fdTable.NewFDs(0, r) in full map: got nil, wanted error")
+ }
+
+ if err := fdTable.NewFDAt(ctx, 1, file, FDFlags{}); err != nil {
+ t.Fatalf("fdTable.NewFDAt(1, r, FDFlags{}): got %v, wanted nil", err)
+ }
+ })
+}
+
+// TestFDTable does a set of simple tests to make sure simple adds, removes,
+// GetRefs, and DecRefs work. The ordering is just weird enough that a
+// table-driven approach seemed clumsy.
+func TestFDTable(t *testing.T) {
+ runTest(t, func(ctx context.Context, fdTable *FDTable, file *fs.File, limitSet *limits.LimitSet) {
+ // Cap the limit at one.
+ limitSet.Set(limits.NumberOfFiles, limits.Limit{1, maxFD}, true)
+
+ if _, err := fdTable.NewFDs(ctx, 0, []*fs.File{file}, FDFlags{}); err != nil {
+ t.Fatalf("Adding an FD to an empty 1-size map: got %v, want nil", err)
+ }
+
+ if _, err := fdTable.NewFDs(ctx, 0, []*fs.File{file}, FDFlags{}); err == nil {
+ t.Fatalf("Adding an FD to a filled 1-size map: got nil, wanted an error")
+ }
+
+ // Remove the previous limit.
+ limitSet.Set(limits.NumberOfFiles, limits.Limit{maxFD, maxFD}, true)
+
+ if fds, err := fdTable.NewFDs(ctx, 0, []*fs.File{file}, FDFlags{}); err != nil {
+ t.Fatalf("Adding an FD to a resized map: got %v, want nil", err)
+ } else if len(fds) != 1 || fds[0] != 1 {
+ t.Fatalf("Added an FD to a resized map: got %v, want {1}", fds)
+ }
+
+ if err := fdTable.NewFDAt(ctx, 1, file, FDFlags{}); err != nil {
+ t.Fatalf("Replacing FD 1 via fdTable.NewFDAt(1, r, FDFlags{}): got %v, wanted nil", err)
+ }
+
+ if err := fdTable.NewFDAt(ctx, maxFD+1, file, FDFlags{}); err == nil {
+ t.Fatalf("Using an FD that was too large via fdTable.NewFDAt(%v, r, FDFlags{}): got nil, wanted an error", maxFD+1)
+ }
+
+ if ref, _ := fdTable.Get(1); ref == nil {
+ t.Fatalf("fdTable.Get(1): got nil, wanted %v", file)
+ }
+
+ if ref, _ := fdTable.Get(2); ref != nil {
+ t.Fatalf("fdTable.Get(2): got a %v, wanted nil", ref)
+ }
+
+ ref := fdTable.Remove(1)
+ if ref == nil {
+ t.Fatalf("fdTable.Remove(1) for an existing FD: failed, want success")
+ }
+ ref.DecRef()
+
+ if ref := fdTable.Remove(1); ref != nil {
+ t.Fatalf("r.Remove(1) for a removed FD: got success, want failure")
+ }
+ })
+}
+
+func TestDescriptorFlags(t *testing.T) {
+ runTest(t, func(ctx context.Context, fdTable *FDTable, file *fs.File, _ *limits.LimitSet) {
+ if err := fdTable.NewFDAt(ctx, 2, file, FDFlags{CloseOnExec: true}); err != nil {
+ t.Fatalf("fdTable.NewFDAt(2, r, FDFlags{}): got %v, wanted nil", err)
+ }
+
+ newFile, flags := fdTable.Get(2)
+ if newFile == nil {
+ t.Fatalf("fdTable.Get(2): got a %v, wanted nil", newFile)
+ }
+
+ if !flags.CloseOnExec {
+ t.Fatalf("new File flags %v don't match original %d\n", flags, 0)
+ }
+ })
+}
+
+func BenchmarkFDLookupAndDecRef(b *testing.B) {
+ b.StopTimer() // Setup.
+
+ runTest(b, func(ctx context.Context, fdTable *FDTable, file *fs.File, _ *limits.LimitSet) {
+ fds, err := fdTable.NewFDs(ctx, 0, []*fs.File{file, file, file, file, file}, FDFlags{})
+ if err != nil {
+ b.Fatalf("fdTable.NewFDs: got %v, wanted nil", err)
+ }
+
+ b.StartTimer() // Benchmark.
+ for i := 0; i < b.N; i++ {
+ tf, _ := fdTable.Get(fds[i%len(fds)])
+ tf.DecRef()
+ }
+ })
+}
+
+func BenchmarkFDLookupAndDecRefConcurrent(b *testing.B) {
+ b.StopTimer() // Setup.
+
+ runTest(b, func(ctx context.Context, fdTable *FDTable, file *fs.File, _ *limits.LimitSet) {
+ fds, err := fdTable.NewFDs(ctx, 0, []*fs.File{file, file, file, file, file}, FDFlags{})
+ if err != nil {
+ b.Fatalf("fdTable.NewFDs: got %v, wanted nil", err)
+ }
+
+ concurrency := runtime.GOMAXPROCS(0)
+ if concurrency < 4 {
+ concurrency = 4
+ }
+ each := b.N / concurrency
+
+ b.StartTimer() // Benchmark.
+ var wg sync.WaitGroup
+ for i := 0; i < concurrency; i++ {
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ for i := 0; i < each; i++ {
+ tf, _ := fdTable.Get(fds[i%len(fds)])
+ tf.DecRef()
+ }
+ }()
+ }
+ wg.Wait()
+ })
+}
diff --git a/pkg/sentry/kernel/fd_table_unsafe.go b/pkg/sentry/kernel/fd_table_unsafe.go
new file mode 100644
index 000000000..e009df974
--- /dev/null
+++ b/pkg/sentry/kernel/fd_table_unsafe.go
@@ -0,0 +1,103 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package kernel
+
+import (
+ "sync/atomic"
+ "unsafe"
+
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+)
+
+type descriptorTable struct {
+ // slice is a *[]unsafe.Pointer, where each element is actually
+ // *descriptor object, updated atomically.
+ //
+ // Changes to the slice itself requiring holding FDTable.mu.
+ slice unsafe.Pointer `state:".(map[int32]*descriptor)"`
+}
+
+// init initializes the table.
+func (f *FDTable) init() {
+ var slice []unsafe.Pointer // Empty slice.
+ atomic.StorePointer(&f.slice, unsafe.Pointer(&slice))
+}
+
+// get gets a file entry.
+//
+// The boolean indicates whether this was in range.
+//
+//go:nosplit
+func (f *FDTable) get(fd int32) (*fs.File, FDFlags, bool) {
+ slice := *(*[]unsafe.Pointer)(atomic.LoadPointer(&f.slice))
+ if fd >= int32(len(slice)) {
+ return nil, FDFlags{}, false
+ }
+ d := (*descriptor)(atomic.LoadPointer(&slice[fd]))
+ if d == nil {
+ return nil, FDFlags{}, true
+ }
+ return d.file, d.flags, true
+}
+
+// set sets an entry.
+//
+// This handles accounting changes, as well as acquiring and releasing the
+// reference needed by the table iff the file is different.
+//
+// Precondition: mu must be held.
+func (f *FDTable) set(fd int32, file *fs.File, flags FDFlags) {
+ slice := *(*[]unsafe.Pointer)(atomic.LoadPointer(&f.slice))
+
+ // Grow the table as required.
+ if last := int32(len(slice)); fd >= last {
+ end := fd + 1
+ if end < 2*last {
+ end = 2 * last
+ }
+ slice = append(slice, make([]unsafe.Pointer, end-last)...)
+ atomic.StorePointer(&f.slice, unsafe.Pointer(&slice))
+ }
+
+ // Create the new element.
+ var d *descriptor
+ if file != nil {
+ d = &descriptor{
+ file: file,
+ flags: flags,
+ }
+ }
+
+ // Update the single element.
+ orig := (*descriptor)(atomic.SwapPointer(&slice[fd], unsafe.Pointer(d)))
+
+ // Acquire a table reference.
+ if file != nil && (orig == nil || file != orig.file) {
+ file.IncRef()
+ }
+
+ // Drop the table reference.
+ if orig != nil && file != orig.file {
+ f.drop(orig.file)
+ }
+
+ // Adjust used.
+ switch {
+ case orig == nil && file != nil:
+ atomic.AddInt32(&f.used, 1)
+ case orig != nil && file == nil:
+ atomic.AddInt32(&f.used, -1)
+ }
+}
diff --git a/pkg/sentry/kernel/fs_context.go b/pkg/sentry/kernel/fs_context.go
index d8115f59a..ded27d668 100644
--- a/pkg/sentry/kernel/fs_context.go
+++ b/pkg/sentry/kernel/fs_context.go
@@ -18,8 +18,8 @@ import (
"fmt"
"sync"
- "gvisor.googlesource.com/gvisor/pkg/refs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
)
// FSContext contains filesystem context.
@@ -51,18 +51,20 @@ type FSContext struct {
func newFSContext(root, cwd *fs.Dirent, umask uint) *FSContext {
root.IncRef()
cwd.IncRef()
- return &FSContext{
+ f := FSContext{
root: root,
cwd: cwd,
umask: umask,
}
+ f.EnableLeakCheck("kernel.FSContext")
+ return &f
}
// destroy is the destructor for an FSContext.
//
// This will call DecRef on both root and cwd Dirents. If either call to
-// DecRef returns an error, then it will be propigated. If both calls to
-// DecRef return an error, then the one from root.DecRef will be propigated.
+// DecRef returns an error, then it will be propagated. If both calls to
+// DecRef return an error, then the one from root.DecRef will be propagated.
//
// Note that there may still be calls to WorkingDirectory() or RootDirectory()
// (that return nil). This is because valid references may still be held via
diff --git a/pkg/sentry/kernel/futex/BUILD b/pkg/sentry/kernel/futex/BUILD
index b6af5b20b..6a31dc044 100644
--- a/pkg/sentry/kernel/futex/BUILD
+++ b/pkg/sentry/kernel/futex/BUILD
@@ -5,7 +5,7 @@ load("//tools/go_stateify:defs.bzl", "go_library", "go_test")
go_template_instance(
name = "atomicptr_bucket",
- out = "atomicptr_bucket.go",
+ out = "atomicptr_bucket_unsafe.go",
package = "futex",
suffix = "Bucket",
template = "//third_party/gvsync:generic_atomicptr",
@@ -29,11 +29,11 @@ go_template_instance(
go_library(
name = "futex",
srcs = [
- "atomicptr_bucket.go",
+ "atomicptr_bucket_unsafe.go",
"futex.go",
"waiter_list.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/futex",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/futex",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
diff --git a/pkg/sentry/kernel/futex/futex.go b/pkg/sentry/kernel/futex/futex.go
index bb38eb81e..278cc8143 100644
--- a/pkg/sentry/kernel/futex/futex.go
+++ b/pkg/sentry/kernel/futex/futex.go
@@ -20,10 +20,10 @@ package futex
import (
"sync"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// KeyKind indicates the type of a Key.
@@ -729,14 +729,14 @@ func (m *Manager) UnlockPI(t Target, addr usermem.Addr, tid uint32, private bool
}
b := m.lockBucket(&k)
- err = m.unlockPILocked(t, addr, tid, b)
+ err = m.unlockPILocked(t, addr, tid, b, &k)
k.release()
b.mu.Unlock()
return err
}
-func (m *Manager) unlockPILocked(t Target, addr usermem.Addr, tid uint32, b *bucket) error {
+func (m *Manager) unlockPILocked(t Target, addr usermem.Addr, tid uint32, b *bucket, key *Key) error {
cur, err := t.LoadUint32(addr)
if err != nil {
return err
@@ -746,7 +746,22 @@ func (m *Manager) unlockPILocked(t Target, addr usermem.Addr, tid uint32, b *buc
return syserror.EPERM
}
- if b.waiters.Empty() {
+ var next *Waiter // Who's the next owner?
+ var next2 *Waiter // Who's the one after that?
+ for w := b.waiters.Front(); w != nil; w = w.Next() {
+ if !w.key.matches(key) {
+ continue
+ }
+
+ if next == nil {
+ next = w
+ } else {
+ next2 = w
+ break
+ }
+ }
+
+ if next == nil {
// It's safe to set 0 because there are no waiters, no new owner, and the
// executing task is the current owner (no owner died bit).
prev, err := t.CompareAndSwapUint32(addr, cur, 0)
@@ -761,12 +776,10 @@ func (m *Manager) unlockPILocked(t Target, addr usermem.Addr, tid uint32, b *buc
return nil
}
- next := b.waiters.Front()
-
// Set next owner's TID, waiters if there are any. Resets owner died bit, if
// set, because the executing task takes over as the owner.
val := next.tid
- if next.Next() != nil {
+ if next2 != nil {
val |= linux.FUTEX_WAITERS
}
diff --git a/pkg/sentry/kernel/futex/futex_test.go b/pkg/sentry/kernel/futex/futex_test.go
index 2de5239bf..65e5d1428 100644
--- a/pkg/sentry/kernel/futex/futex_test.go
+++ b/pkg/sentry/kernel/futex/futex_test.go
@@ -23,7 +23,7 @@ import (
"testing"
"unsafe"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// testData implements the Target interface, and allows us to
diff --git a/pkg/sentry/kernel/ipc_namespace.go b/pkg/sentry/kernel/ipc_namespace.go
index ebe12812c..80a070d7e 100644
--- a/pkg/sentry/kernel/ipc_namespace.go
+++ b/pkg/sentry/kernel/ipc_namespace.go
@@ -15,9 +15,9 @@
package kernel
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/semaphore"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/shm"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/semaphore"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/shm"
)
// IPCNamespace represents an IPC namespace.
@@ -40,7 +40,7 @@ func NewIPCNamespace(userNS *auth.UserNamespace) *IPCNamespace {
}
}
-// SemaphoreRegistry returns the semanphore set registry for this namespace.
+// SemaphoreRegistry returns the semaphore set registry for this namespace.
func (i *IPCNamespace) SemaphoreRegistry() *semaphore.Registry {
return i.semaphores
}
diff --git a/pkg/sentry/kernel/kdefs/BUILD b/pkg/sentry/kernel/kdefs/BUILD
deleted file mode 100644
index 38aaca134..000000000
--- a/pkg/sentry/kernel/kdefs/BUILD
+++ /dev/null
@@ -1,10 +0,0 @@
-load("//tools/go_stateify:defs.bzl", "go_library")
-
-package(licenses = ["notice"])
-
-go_library(
- name = "kdefs",
- srcs = ["kdefs.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs",
- visibility = ["//:sandbox"],
-)
diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go
index f253a81d9..38b49cba2 100644
--- a/pkg/sentry/kernel/kernel.go
+++ b/pkg/sentry/kernel/kernel.go
@@ -39,34 +39,34 @@ import (
"sync/atomic"
"time"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/cpuid"
- "gvisor.googlesource.com/gvisor/pkg/eventchannel"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/refs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/timerfd"
- "gvisor.googlesource.com/gvisor/pkg/sentry/hostcpu"
- "gvisor.googlesource.com/gvisor/pkg/sentry/inet"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/epoll"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/futex"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/sched"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/limits"
- "gvisor.googlesource.com/gvisor/pkg/sentry/loader"
- "gvisor.googlesource.com/gvisor/pkg/sentry/mm"
- "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/netlink/port"
- sentrytime "gvisor.googlesource.com/gvisor/pkg/sentry/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/unimpl"
- uspb "gvisor.googlesource.com/gvisor/pkg/sentry/unimpl/unimplemented_syscall_go_proto"
- "gvisor.googlesource.com/gvisor/pkg/sentry/uniqueid"
- "gvisor.googlesource.com/gvisor/pkg/state"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/cpuid"
+ "gvisor.dev/gvisor/pkg/eventchannel"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/timerfd"
+ "gvisor.dev/gvisor/pkg/sentry/hostcpu"
+ "gvisor.dev/gvisor/pkg/sentry/inet"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/epoll"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/futex"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/sched"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
+ "gvisor.dev/gvisor/pkg/sentry/loader"
+ "gvisor.dev/gvisor/pkg/sentry/mm"
+ "gvisor.dev/gvisor/pkg/sentry/pgalloc"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/socket/netlink/port"
+ sentrytime "gvisor.dev/gvisor/pkg/sentry/time"
+ "gvisor.dev/gvisor/pkg/sentry/unimpl"
+ uspb "gvisor.dev/gvisor/pkg/sentry/unimpl/unimplemented_syscall_go_proto"
+ "gvisor.dev/gvisor/pkg/sentry/uniqueid"
+ "gvisor.dev/gvisor/pkg/state"
+ "gvisor.dev/gvisor/pkg/tcpip"
)
// Kernel represents an emulated Linux kernel. It must be initialized by calling
@@ -155,7 +155,7 @@ type Kernel struct {
// cpuClockTicker increments cpuClock.
cpuClockTicker *ktime.Timer `state:"nosave"`
- // fdMapUids is an ever-increasing counter for generating FDMap uids.
+ // fdMapUids is an ever-increasing counter for generating FDTable uids.
//
// fdMapUids is mutable, and is accessed using atomic memory operations.
fdMapUids uint64
@@ -381,13 +381,27 @@ func (k *Kernel) SaveTo(w io.Writer) error {
// flushMountSourceRefs flushes the MountSources for all mounted filesystems
// and open FDs.
func (k *Kernel) flushMountSourceRefs() error {
- // Flush all mount sources for currently mounted filesystems.
+ // Flush all mount sources for currently mounted filesystems in the
+ // root mount namespace.
k.mounts.FlushMountSourceRefs()
+ // Some tasks may have other mount namespaces; flush those as well.
+ flushed := make(map[*fs.MountNamespace]struct{})
+ k.tasks.mu.RLock()
+ k.tasks.forEachThreadGroupLocked(func(tg *ThreadGroup) {
+ if _, ok := flushed[tg.mounts]; ok {
+ // Already flushed.
+ return
+ }
+ tg.mounts.FlushMountSourceRefs()
+ flushed[tg.mounts] = struct{}{}
+ })
+ k.tasks.mu.RUnlock()
+
// There may be some open FDs whose filesystems have been unmounted. We
// must flush those as well.
- return k.tasks.forEachFDPaused(func(desc descriptor) error {
- desc.file.Dirent.Inode.MountSource.FlushDirentRefs()
+ return k.tasks.forEachFDPaused(func(file *fs.File) error {
+ file.Dirent.Inode.MountSource.FlushDirentRefs()
return nil
})
}
@@ -396,35 +410,35 @@ func (k *Kernel) flushMountSourceRefs() error {
// task.
//
// Precondition: Must be called with the kernel paused.
-func (ts *TaskSet) forEachFDPaused(f func(descriptor) error) error {
+func (ts *TaskSet) forEachFDPaused(f func(*fs.File) error) (err error) {
ts.mu.RLock()
defer ts.mu.RUnlock()
for t := range ts.Root.tids {
// We can skip locking Task.mu here since the kernel is paused.
- if t.fds == nil {
+ if t.fdTable == nil {
continue
}
- for _, desc := range t.fds.files {
- if err := f(desc); err != nil {
- return err
+ t.fdTable.forEach(func(_ int32, file *fs.File, _ FDFlags) {
+ if lastErr := f(file); lastErr != nil && err == nil {
+ err = lastErr
}
- }
+ })
}
- return nil
+ return err
}
func (ts *TaskSet) flushWritesToFiles(ctx context.Context) error {
- return ts.forEachFDPaused(func(desc descriptor) error {
- if flags := desc.file.Flags(); !flags.Write {
+ return ts.forEachFDPaused(func(file *fs.File) error {
+ if flags := file.Flags(); !flags.Write {
return nil
}
- if sattr := desc.file.Dirent.Inode.StableAttr; !fs.IsFile(sattr) && !fs.IsDir(sattr) {
+ if sattr := file.Dirent.Inode.StableAttr; !fs.IsFile(sattr) && !fs.IsDir(sattr) {
return nil
}
// Here we need all metadata synced.
- syncErr := desc.file.Fsync(ctx, 0, fs.FileMaxOffset, fs.SyncAll)
+ syncErr := file.Fsync(ctx, 0, fs.FileMaxOffset, fs.SyncAll)
if err := fs.SaveFileFsyncError(syncErr); err != nil {
- name, _ := desc.file.Dirent.FullName(nil /* root */)
+ name, _ := file.Dirent.FullName(nil /* root */)
// Wrap this error in ErrSaveRejection
// so that it will trigger a save
// error, rather than a panic. This
@@ -469,14 +483,12 @@ func (ts *TaskSet) unregisterEpollWaiters() {
defer ts.mu.RUnlock()
for t := range ts.Root.tids {
// We can skip locking Task.mu here since the kernel is paused.
- if fdmap := t.fds; fdmap != nil {
- for _, desc := range fdmap.files {
- if desc.file != nil {
- if e, ok := desc.file.FileOperations.(*epoll.EventPoll); ok {
- e.UnregisterEpollWaiters()
- }
+ if t.fdTable != nil {
+ t.fdTable.forEach(func(_ int32, file *fs.File, _ FDFlags) {
+ if e, ok := file.FileOperations.(*epoll.EventPoll); ok {
+ e.UnregisterEpollWaiters()
}
- }
+ })
}
}
}
@@ -524,6 +536,8 @@ func (k *Kernel) LoadFrom(r io.Reader, net inet.Stack) error {
}
log.Infof("Memory load took [%s].", time.Since(memoryStart))
+ log.Infof("Overall load took [%s]", time.Since(loadStart))
+
// Ensure that all pending asynchronous work is complete:
// - namedpipe opening
// - inode file opening
@@ -588,9 +602,9 @@ type CreateProcessArgs struct {
// Credentials is the initial credentials.
Credentials *auth.Credentials
- // FDMap is the initial set of file descriptors. If CreateProcess succeeds,
- // it takes a reference on FDMap.
- FDMap *FDMap
+ // FDTable is the initial set of file descriptors. If CreateProcess succeeds,
+ // it takes a reference on FDTable.
+ FDTable *FDTable
// Umask is the initial umask.
Umask uint
@@ -611,12 +625,18 @@ type CreateProcessArgs struct {
// AbstractSocketNamespace is the initial Abstract Socket namespace.
AbstractSocketNamespace *AbstractSocketNamespace
+ // MountNamespace optionally contains the mount namespace for this
+ // process. If nil, the kernel's mount namespace is used.
+ //
+ // Anyone setting MountNamespace must donate a reference (i.e.
+ // increment it).
+ MountNamespace *fs.MountNamespace
+
// Root optionally contains the dirent that serves as the root for the
// process. If nil, the mount namespace's root is used as the process'
// root.
//
- // Anyone setting Root must donate a reference (i.e. increment it) to
- // keep it alive until it is decremented by CreateProcess.
+ // Anyone setting Root must donate a reference (i.e. increment it).
Root *fs.Dirent
// ContainerID is the container that the process belongs to.
@@ -659,7 +679,7 @@ func (ctx *createProcessContext) Value(key interface{}) interface{} {
return ctx.args.Credentials
case fs.CtxRoot:
if ctx.args.Root != nil {
- // Take a refernce on the root dirent that will be
+ // Take a reference on the root dirent that will be
// given to the caller.
ctx.args.Root.IncRef()
return ctx.args.Root
@@ -715,20 +735,29 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID,
return nil, 0, fmt.Errorf("no kernel MountNamespace")
}
- tg := k.newThreadGroup(k.tasks.Root, NewSignalHandlers(), linux.SIGCHLD, args.Limits, k.monotonicClock)
+ // Grab the mount namespace.
+ mounts := args.MountNamespace
+ if mounts == nil {
+ // If no MountNamespace was configured, then use the kernel's
+ // root mount namespace, with an extra reference that will be
+ // donated to the task.
+ mounts = k.mounts
+ mounts.IncRef()
+ }
+
+ tg := k.newThreadGroup(mounts, k.tasks.Root, NewSignalHandlers(), linux.SIGCHLD, args.Limits, k.monotonicClock)
ctx := args.NewContext(k)
// Grab the root directory.
root := args.Root
if root == nil {
- root = fs.RootFromContext(ctx)
- // Is the root STILL nil?
- if root == nil {
- return nil, 0, fmt.Errorf("CreateProcessArgs.Root was not provided, and failed to get root from context")
- }
+ // If no Root was configured, then get it from the
+ // MountNamespace.
+ root = mounts.Root()
}
+ // The call to newFSContext below will take a reference on root, so we
+ // don't need to hold this one.
defer root.DecRef()
- args.Root = nil
// Grab the working directory.
remainingTraversals := uint(args.MaxSymlinkTraversals)
@@ -760,9 +789,9 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID,
return nil, 0, errors.New(se.String())
}
- // Take a reference on the FDMap, which will be transferred to
+ // Take a reference on the FDTable, which will be transferred to
// TaskSet.NewTask().
- args.FDMap.IncRef()
+ args.FDTable.IncRef()
// Create the task.
config := &TaskConfig{
@@ -770,7 +799,7 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID,
ThreadGroup: tg,
TaskContext: tc,
FSContext: newFSContext(root, wd, args.Umask),
- FDMap: args.FDMap,
+ FDTable: args.FDTable,
Credentials: args.Credentials,
AllowedCPUMask: sched.NewFullCPUSet(k.applicationCores),
UTSNamespace: args.UTSNamespace,
@@ -842,7 +871,7 @@ func (k *Kernel) pauseTimeLocked() {
}
// By precondition, nothing else can be interacting with PIDNamespace.tids
- // or FDMap.files, so we can iterate them without synchronization. (We
+ // or FDTable.files, so we can iterate them without synchronization. (We
// can't hold the TaskSet mutex when pausing thread group timers because
// thread group timers call ThreadGroup.SendSignal, which takes the TaskSet
// mutex, while holding the Timer mutex.)
@@ -853,14 +882,14 @@ func (k *Kernel) pauseTimeLocked() {
it.PauseTimer()
}
}
- // This means we'll iterate FDMaps shared by multiple tasks repeatedly,
+ // This means we'll iterate FDTables shared by multiple tasks repeatedly,
// but ktime.Timer.Pause is idempotent so this is harmless.
- if fdm := t.fds; fdm != nil {
- for _, desc := range fdm.files {
- if tfd, ok := desc.file.FileOperations.(*timerfd.TimerOperations); ok {
+ if t.fdTable != nil {
+ t.fdTable.forEach(func(_ int32, file *fs.File, _ FDFlags) {
+ if tfd, ok := file.FileOperations.(*timerfd.TimerOperations); ok {
tfd.PauseTimer()
}
- }
+ })
}
}
k.timekeeper.PauseUpdates()
@@ -885,12 +914,12 @@ func (k *Kernel) resumeTimeLocked() {
it.ResumeTimer()
}
}
- if fdm := t.fds; fdm != nil {
- for _, desc := range fdm.files {
- if tfd, ok := desc.file.FileOperations.(*timerfd.TimerOperations); ok {
+ if t.fdTable != nil {
+ t.fdTable.forEach(func(_ int32, file *fs.File, _ FDFlags) {
+ if tfd, ok := file.FileOperations.(*timerfd.TimerOperations); ok {
tfd.ResumeTimer()
}
- }
+ })
}
}
}
diff --git a/pkg/sentry/kernel/kernel_state.go b/pkg/sentry/kernel/kernel_state.go
index 48c3ff5a9..909219086 100644
--- a/pkg/sentry/kernel/kernel_state.go
+++ b/pkg/sentry/kernel/kernel_state.go
@@ -15,8 +15,8 @@
package kernel
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/device"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/sentry/device"
+ "gvisor.dev/gvisor/pkg/tcpip"
)
// saveDanglingEndpoints is invoked by stateify.
diff --git a/pkg/sentry/kernel/memevent/BUILD b/pkg/sentry/kernel/memevent/BUILD
index 347a69062..ebcfaa619 100644
--- a/pkg/sentry/kernel/memevent/BUILD
+++ b/pkg/sentry/kernel/memevent/BUILD
@@ -6,7 +6,7 @@ package(licenses = ["notice"])
go_library(
name = "memevent",
srcs = ["memory_events.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/memevent",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/memevent",
visibility = ["//:sandbox"],
deps = [
":memory_events_go_proto",
@@ -26,7 +26,7 @@ proto_library(
go_proto_library(
name = "memory_events_go_proto",
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/memevent/memory_events_go_proto",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/memevent/memory_events_go_proto",
proto = ":memory_events_proto",
visibility = ["//visibility:public"],
)
diff --git a/pkg/sentry/kernel/memevent/memory_events.go b/pkg/sentry/kernel/memevent/memory_events.go
index 0e2cee807..b0d98e7f0 100644
--- a/pkg/sentry/kernel/memevent/memory_events.go
+++ b/pkg/sentry/kernel/memevent/memory_events.go
@@ -20,12 +20,12 @@ import (
"sync"
"time"
- "gvisor.googlesource.com/gvisor/pkg/eventchannel"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/metric"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- pb "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/memevent/memory_events_go_proto"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/eventchannel"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/metric"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ pb "gvisor.dev/gvisor/pkg/sentry/kernel/memevent/memory_events_go_proto"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
)
var totalTicks = metric.MustCreateNewUint64Metric("/memory_events/ticks", false /*sync*/, "Total number of memory event periods that have elapsed since startup.")
diff --git a/pkg/sentry/kernel/pending_signals.go b/pkg/sentry/kernel/pending_signals.go
index c93f6598a..77a35b788 100644
--- a/pkg/sentry/kernel/pending_signals.go
+++ b/pkg/sentry/kernel/pending_signals.go
@@ -15,9 +15,9 @@
package kernel
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/bits"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/bits"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
)
const (
diff --git a/pkg/sentry/kernel/pending_signals_state.go b/pkg/sentry/kernel/pending_signals_state.go
index 2c902c7e3..ca8b4e164 100644
--- a/pkg/sentry/kernel/pending_signals_state.go
+++ b/pkg/sentry/kernel/pending_signals_state.go
@@ -15,7 +15,7 @@
package kernel
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
)
// +stateify savable
diff --git a/pkg/sentry/kernel/pipe/BUILD b/pkg/sentry/kernel/pipe/BUILD
index b07d15a2a..4d15cca85 100644
--- a/pkg/sentry/kernel/pipe/BUILD
+++ b/pkg/sentry/kernel/pipe/BUILD
@@ -27,7 +27,7 @@ go_library(
"reader_writer.go",
"writer.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/pipe",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/pipe",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
diff --git a/pkg/sentry/kernel/pipe/buffer.go b/pkg/sentry/kernel/pipe/buffer.go
index 4360dc44f..69ef2a720 100644
--- a/pkg/sentry/kernel/pipe/buffer.go
+++ b/pkg/sentry/kernel/pipe/buffer.go
@@ -17,7 +17,7 @@ package pipe
import (
"sync"
- "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
+ "gvisor.dev/gvisor/pkg/sentry/safemem"
)
// buffer encapsulates a queueable byte buffer.
diff --git a/pkg/sentry/kernel/pipe/buffer_test.go b/pkg/sentry/kernel/pipe/buffer_test.go
index 4b7dbc43f..ee1b90115 100644
--- a/pkg/sentry/kernel/pipe/buffer_test.go
+++ b/pkg/sentry/kernel/pipe/buffer_test.go
@@ -18,7 +18,7 @@ import (
"testing"
"unsafe"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
func TestBufferSize(t *testing.T) {
diff --git a/pkg/sentry/kernel/pipe/device.go b/pkg/sentry/kernel/pipe/device.go
index eb59e15a1..89f5d9342 100644
--- a/pkg/sentry/kernel/pipe/device.go
+++ b/pkg/sentry/kernel/pipe/device.go
@@ -14,7 +14,7 @@
package pipe
-import "gvisor.googlesource.com/gvisor/pkg/sentry/device"
+import "gvisor.dev/gvisor/pkg/sentry/device"
// pipeDevice is used for all pipe files.
var pipeDevice = device.NewAnonDevice()
diff --git a/pkg/sentry/kernel/pipe/node.go b/pkg/sentry/kernel/pipe/node.go
index dc7da529e..a2dc72204 100644
--- a/pkg/sentry/kernel/pipe/node.go
+++ b/pkg/sentry/kernel/pipe/node.go
@@ -17,12 +17,12 @@ package pipe
import (
"sync"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/amutex"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/amutex"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// inodeOperations implements fs.InodeOperations for pipes.
diff --git a/pkg/sentry/kernel/pipe/node_test.go b/pkg/sentry/kernel/pipe/node_test.go
index 9a946b380..adbad7764 100644
--- a/pkg/sentry/kernel/pipe/node_test.go
+++ b/pkg/sentry/kernel/pipe/node_test.go
@@ -18,11 +18,11 @@ import (
"testing"
"time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
type sleeper struct {
@@ -63,7 +63,7 @@ var perms fs.FilePermissions = fs.FilePermissions{
func testOpenOrDie(ctx context.Context, t *testing.T, n fs.InodeOperations, flags fs.FileFlags, doneChan chan<- struct{}) (*fs.File, error) {
inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{Type: fs.Pipe})
- d := fs.NewDirent(inode, "pipe")
+ d := fs.NewDirent(ctx, inode, "pipe")
file, err := n.GetFile(ctx, d, flags)
if err != nil {
t.Fatalf("open with flags %+v failed: %v", flags, err)
@@ -76,7 +76,7 @@ func testOpenOrDie(ctx context.Context, t *testing.T, n fs.InodeOperations, flag
func testOpen(ctx context.Context, t *testing.T, n fs.InodeOperations, flags fs.FileFlags, resChan chan<- openResult) (*fs.File, error) {
inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{Type: fs.Pipe})
- d := fs.NewDirent(inode, "pipe")
+ d := fs.NewDirent(ctx, inode, "pipe")
file, err := n.GetFile(ctx, d, flags)
if resChan != nil {
resChan <- openResult{file, err}
diff --git a/pkg/sentry/kernel/pipe/pipe.go b/pkg/sentry/kernel/pipe/pipe.go
index 73438dc62..247e2928e 100644
--- a/pkg/sentry/kernel/pipe/pipe.go
+++ b/pkg/sentry/kernel/pipe/pipe.go
@@ -21,11 +21,11 @@ import (
"sync/atomic"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
const (
@@ -39,19 +39,6 @@ const (
MaximumPipeSize = 8 << 20
)
-// Sizer is an interface for setting and getting the size of a pipe.
-//
-// It is implemented by Pipe and, through embedding, all other types.
-type Sizer interface {
- // PipeSize returns the pipe capacity in bytes.
- PipeSize() int64
-
- // SetPipeSize sets the new pipe capacity in bytes.
- //
- // The new size is returned (which may be capped).
- SetPipeSize(int64) (int64, error)
-}
-
// Pipe is an encapsulation of a platform-independent pipe.
// It manages a buffered byte queue shared between a reader/writer
// pair.
@@ -150,8 +137,8 @@ func NewConnectedPipe(ctx context.Context, sizeBytes, atomicIOBytes int64) (*fs.
InodeID: ino,
BlockSize: int64(atomicIOBytes),
}
- ms := fs.NewPseudoMountSource()
- d := fs.NewDirent(fs.NewInode(iops, ms, sattr), fmt.Sprintf("pipe:[%d]", ino))
+ ms := fs.NewPseudoMountSource(ctx)
+ d := fs.NewDirent(ctx, fs.NewInode(ctx, iops, ms, sattr), fmt.Sprintf("pipe:[%d]", ino))
// The p.Open calls below will each take a reference on the Dirent. We
// must drop the one we already have.
defer d.DecRef()
@@ -162,6 +149,7 @@ func NewConnectedPipe(ctx context.Context, sizeBytes, atomicIOBytes int64) (*fs.
//
// Precondition: at least one of flags.Read or flags.Write must be set.
func (p *Pipe) Open(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) *fs.File {
+ flags.NonSeekable = true
switch {
case flags.Read && flags.Write:
p.rOpen()
@@ -398,15 +386,15 @@ func (p *Pipe) queued() int64 {
return p.size
}
-// PipeSize implements PipeSizer.PipeSize.
-func (p *Pipe) PipeSize() int64 {
+// FifoSize implements fs.FifoSizer.FifoSize.
+func (p *Pipe) FifoSize(context.Context, *fs.File) (int64, error) {
p.mu.Lock()
defer p.mu.Unlock()
- return p.max
+ return p.max, nil
}
-// SetPipeSize implements PipeSize.SetPipeSize.
-func (p *Pipe) SetPipeSize(size int64) (int64, error) {
+// SetFifoSize implements fs.FifoSizer.SetFifoSize.
+func (p *Pipe) SetFifoSize(size int64) (int64, error) {
if size < 0 {
return 0, syserror.EINVAL
}
diff --git a/pkg/sentry/kernel/pipe/pipe_test.go b/pkg/sentry/kernel/pipe/pipe_test.go
index 298c6587b..e3a14b665 100644
--- a/pkg/sentry/kernel/pipe/pipe_test.go
+++ b/pkg/sentry/kernel/pipe/pipe_test.go
@@ -18,10 +18,10 @@ import (
"bytes"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
func TestPipeRW(t *testing.T) {
diff --git a/pkg/sentry/kernel/pipe/reader.go b/pkg/sentry/kernel/pipe/reader.go
index 656be824d..7724b4452 100644
--- a/pkg/sentry/kernel/pipe/reader.go
+++ b/pkg/sentry/kernel/pipe/reader.go
@@ -15,7 +15,7 @@
package pipe
import (
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// Reader satisfies the fs.FileOperations interface for read-only pipes.
diff --git a/pkg/sentry/kernel/pipe/reader_writer.go b/pkg/sentry/kernel/pipe/reader_writer.go
index e560b9be9..f69dbf27b 100644
--- a/pkg/sentry/kernel/pipe/reader_writer.go
+++ b/pkg/sentry/kernel/pipe/reader_writer.go
@@ -18,13 +18,13 @@ import (
"math"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// ReaderWriter satisfies the FileOperations interface and services both
@@ -77,7 +77,7 @@ func (rw *ReaderWriter) Readiness(mask waiter.EventMask) waiter.EventMask {
}
// Ioctl implements fs.FileOperations.Ioctl.
-func (rw *ReaderWriter) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+func (rw *ReaderWriter) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
// Switch on ioctl request.
switch int(args[1].Int()) {
case linux.FIONREAD:
diff --git a/pkg/sentry/kernel/pipe/writer.go b/pkg/sentry/kernel/pipe/writer.go
index 8d5b68541..5bc6aa931 100644
--- a/pkg/sentry/kernel/pipe/writer.go
+++ b/pkg/sentry/kernel/pipe/writer.go
@@ -15,7 +15,7 @@
package pipe
import (
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// Writer satisfies the fs.FileOperations interface for write-only pipes.
diff --git a/pkg/sentry/kernel/posixtimer.go b/pkg/sentry/kernel/posixtimer.go
index a016b4087..c5d095af7 100644
--- a/pkg/sentry/kernel/posixtimer.go
+++ b/pkg/sentry/kernel/posixtimer.go
@@ -17,10 +17,10 @@ package kernel
import (
"math"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// IntervalTimer represents a POSIX interval timer as described by
diff --git a/pkg/sentry/kernel/ptrace.go b/pkg/sentry/kernel/ptrace.go
index 193447b17..3be171cdc 100644
--- a/pkg/sentry/kernel/ptrace.go
+++ b/pkg/sentry/kernel/ptrace.go
@@ -17,11 +17,11 @@ package kernel
import (
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/mm"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/mm"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// ptraceOptions are the subset of options controlling a task's ptrace behavior
diff --git a/pkg/sentry/kernel/ptrace_amd64.go b/pkg/sentry/kernel/ptrace_amd64.go
index 048eeaa3f..5514cf432 100644
--- a/pkg/sentry/kernel/ptrace_amd64.go
+++ b/pkg/sentry/kernel/ptrace_amd64.go
@@ -17,9 +17,9 @@
package kernel
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// ptraceArch implements arch-specific ptrace commands.
diff --git a/pkg/sentry/kernel/ptrace_arm64.go b/pkg/sentry/kernel/ptrace_arm64.go
index 4899c813f..0acdf769d 100644
--- a/pkg/sentry/kernel/ptrace_arm64.go
+++ b/pkg/sentry/kernel/ptrace_arm64.go
@@ -17,9 +17,9 @@
package kernel
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// ptraceArch implements arch-specific ptrace commands.
diff --git a/pkg/sentry/kernel/rseq.go b/pkg/sentry/kernel/rseq.go
index c4fb2c56c..24ea002ba 100644
--- a/pkg/sentry/kernel/rseq.go
+++ b/pkg/sentry/kernel/rseq.go
@@ -15,9 +15,9 @@
package kernel
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/hostcpu"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/sentry/hostcpu"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// Restartable sequences, as described in https://lwn.net/Articles/650333/.
diff --git a/pkg/sentry/kernel/sched/BUILD b/pkg/sentry/kernel/sched/BUILD
index 184e8a35b..1725b8562 100644
--- a/pkg/sentry/kernel/sched/BUILD
+++ b/pkg/sentry/kernel/sched/BUILD
@@ -8,7 +8,7 @@ go_library(
"cpuset.go",
"sched.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/sched",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/sched",
visibility = ["//pkg/sentry:internal"],
)
diff --git a/pkg/sentry/kernel/seccomp.go b/pkg/sentry/kernel/seccomp.go
index cc75eb08a..2347dcf36 100644
--- a/pkg/sentry/kernel/seccomp.go
+++ b/pkg/sentry/kernel/seccomp.go
@@ -17,12 +17,12 @@ package kernel
import (
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/binary"
- "gvisor.googlesource.com/gvisor/pkg/bpf"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/bpf"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
const maxSyscallFilterInstructions = 1 << 15
diff --git a/pkg/sentry/kernel/semaphore/BUILD b/pkg/sentry/kernel/semaphore/BUILD
index 840943ca8..36edf10f3 100644
--- a/pkg/sentry/kernel/semaphore/BUILD
+++ b/pkg/sentry/kernel/semaphore/BUILD
@@ -21,7 +21,7 @@ go_library(
"semaphore.go",
"waiter_list.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/semaphore",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/semaphore",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
diff --git a/pkg/sentry/kernel/semaphore/semaphore.go b/pkg/sentry/kernel/semaphore/semaphore.go
index 9d0620e02..93fe68a3e 100644
--- a/pkg/sentry/kernel/semaphore/semaphore.go
+++ b/pkg/sentry/kernel/semaphore/semaphore.go
@@ -19,13 +19,13 @@ import (
"fmt"
"sync"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/syserror"
)
const (
@@ -86,7 +86,7 @@ type Set struct {
dead bool
}
-// sem represents a single semanphore from a set.
+// sem represents a single semaphore from a set.
//
// +stateify savable
type sem struct {
diff --git a/pkg/sentry/kernel/semaphore/semaphore_test.go b/pkg/sentry/kernel/semaphore/semaphore_test.go
index abfcd0fb4..c235f6ca4 100644
--- a/pkg/sentry/kernel/semaphore/semaphore_test.go
+++ b/pkg/sentry/kernel/semaphore/semaphore_test.go
@@ -17,11 +17,11 @@ package semaphore
import (
"testing"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/syserror"
)
func executeOps(ctx context.Context, t *testing.T, set *Set, ops []linux.Sembuf, block bool) chan struct{} {
diff --git a/pkg/sentry/kernel/sessions.go b/pkg/sentry/kernel/sessions.go
index 610e199da..81fcd8258 100644
--- a/pkg/sentry/kernel/sessions.go
+++ b/pkg/sentry/kernel/sessions.go
@@ -15,10 +15,10 @@
package kernel
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/refs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// SessionID is the public identifier.
@@ -294,6 +294,7 @@ func (tg *ThreadGroup) createSession() error {
id: SessionID(id),
leader: tg,
}
+ s.refs.EnableLeakCheck("kernel.Session")
// Create a new ProcessGroup, belonging to that Session.
// This also has a single reference (assigned below).
@@ -307,6 +308,7 @@ func (tg *ThreadGroup) createSession() error {
session: s,
ancestors: 0,
}
+ pg.refs.EnableLeakCheck("kernel.ProcessGroup")
// Tie them and return the result.
s.processGroups.PushBack(pg)
@@ -378,11 +380,13 @@ func (tg *ThreadGroup) CreateProcessGroup() error {
// We manually adjust the ancestors if the parent is in the same
// session.
tg.processGroup.session.incRef()
- pg := &ProcessGroup{
+ pg := ProcessGroup{
id: ProcessGroupID(id),
originator: tg,
session: tg.processGroup.session,
}
+ pg.refs.EnableLeakCheck("kernel.ProcessGroup")
+
if tg.leader.parent != nil && tg.leader.parent.tg.processGroup.session == pg.session {
pg.ancestors++
}
@@ -390,20 +394,20 @@ func (tg *ThreadGroup) CreateProcessGroup() error {
// Assign the new process group; adjust children.
oldParentPG := tg.parentPG()
tg.forEachChildThreadGroupLocked(func(childTG *ThreadGroup) {
- childTG.processGroup.incRefWithParent(pg)
+ childTG.processGroup.incRefWithParent(&pg)
childTG.processGroup.decRefWithParent(oldParentPG)
})
tg.processGroup.decRefWithParent(oldParentPG)
- tg.processGroup = pg
+ tg.processGroup = &pg
// Add the new process group to the session.
- pg.session.processGroups.PushBack(pg)
+ pg.session.processGroups.PushBack(&pg)
// Ensure this translation is added to all namespaces.
for ns := tg.pidns; ns != nil; ns = ns.parent {
local := ns.tgids[tg]
- ns.pgids[pg] = ProcessGroupID(local)
- ns.processGroups[ProcessGroupID(local)] = pg
+ ns.pgids[&pg] = ProcessGroupID(local)
+ ns.processGroups[ProcessGroupID(local)] = &pg
}
return nil
diff --git a/pkg/sentry/kernel/shm/BUILD b/pkg/sentry/kernel/shm/BUILD
index bc2089872..aa7471eb6 100644
--- a/pkg/sentry/kernel/shm/BUILD
+++ b/pkg/sentry/kernel/shm/BUILD
@@ -8,7 +8,7 @@ go_library(
"device.go",
"shm.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/shm",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/shm",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
diff --git a/pkg/sentry/kernel/shm/device.go b/pkg/sentry/kernel/shm/device.go
index 3cb759072..6b0d5818b 100644
--- a/pkg/sentry/kernel/shm/device.go
+++ b/pkg/sentry/kernel/shm/device.go
@@ -14,7 +14,7 @@
package shm
-import "gvisor.googlesource.com/gvisor/pkg/sentry/device"
+import "gvisor.dev/gvisor/pkg/sentry/device"
// shmDevice is the kernel shm device.
var shmDevice = device.NewAnonDevice()
diff --git a/pkg/sentry/kernel/shm/shm.go b/pkg/sentry/kernel/shm/shm.go
index 00393b5f0..5bd610f68 100644
--- a/pkg/sentry/kernel/shm/shm.go
+++ b/pkg/sentry/kernel/shm/shm.go
@@ -37,19 +37,19 @@ import (
"fmt"
"sync"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/refs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/pgalloc"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// Key represents a shm segment key. Analogous to a file name.
@@ -224,6 +224,7 @@ func (r *Registry) newShm(ctx context.Context, pid int32, key Key, creator fs.Fi
creatorPID: pid,
changeTime: ktime.NowFromContext(ctx),
}
+ shm.EnableLeakCheck("kernel.Shm")
// Find the next available ID.
for id := r.lastIDUsed + 1; id != r.lastIDUsed; id++ {
diff --git a/pkg/sentry/kernel/signal.go b/pkg/sentry/kernel/signal.go
index b528ec0dc..02eede93d 100644
--- a/pkg/sentry/kernel/signal.go
+++ b/pkg/sentry/kernel/signal.go
@@ -17,10 +17,10 @@ package kernel
import (
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
)
// SignalPanic is used to panic the running threads. It is a signal which
diff --git a/pkg/sentry/kernel/signal_handlers.go b/pkg/sentry/kernel/signal_handlers.go
index ce8bcb5e5..a16f3d57f 100644
--- a/pkg/sentry/kernel/signal_handlers.go
+++ b/pkg/sentry/kernel/signal_handlers.go
@@ -17,8 +17,8 @@ package kernel
import (
"sync"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
)
// SignalHandlers holds information about signal actions.
diff --git a/pkg/sentry/kernel/syscalls.go b/pkg/sentry/kernel/syscalls.go
index 27cd3728b..220fa73a2 100644
--- a/pkg/sentry/kernel/syscalls.go
+++ b/pkg/sentry/kernel/syscalls.go
@@ -19,10 +19,10 @@ import (
"sync"
"sync/atomic"
- "gvisor.googlesource.com/gvisor/pkg/abi"
- "gvisor.googlesource.com/gvisor/pkg/bits"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/abi"
+ "gvisor.dev/gvisor/pkg/bits"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// maxSyscallNum is the highest supported syscall number.
diff --git a/pkg/sentry/kernel/syslog.go b/pkg/sentry/kernel/syslog.go
index 175d1b247..8227ecf1d 100644
--- a/pkg/sentry/kernel/syslog.go
+++ b/pkg/sentry/kernel/syslog.go
@@ -67,6 +67,7 @@ func (s *syslog) Log() []byte {
"Creating process schedule...",
"Generating random numbers by fair dice roll...",
"Rewriting operating system in Javascript...",
+ "Reticulating splines...",
"Consulting tar man page...",
"Forking spaghetti code...",
"Checking naughty and nice process list...",
diff --git a/pkg/sentry/kernel/table_test.go b/pkg/sentry/kernel/table_test.go
index 3f2b042c8..32cf47e05 100644
--- a/pkg/sentry/kernel/table_test.go
+++ b/pkg/sentry/kernel/table_test.go
@@ -17,8 +17,8 @@ package kernel
import (
"testing"
- "gvisor.googlesource.com/gvisor/pkg/abi"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/abi"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
)
const (
diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go
index 4d889422f..e91f82bb3 100644
--- a/pkg/sentry/kernel/task.go
+++ b/pkg/sentry/kernel/task.go
@@ -18,24 +18,24 @@ import (
"sync"
"sync/atomic"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/bpf"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/inet"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/futex"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/sched"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/limits"
- "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/unimpl"
- "gvisor.googlesource.com/gvisor/pkg/sentry/uniqueid"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/third_party/gvsync"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/bpf"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/inet"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/futex"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/sched"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
+ "gvisor.dev/gvisor/pkg/sentry/pgalloc"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/unimpl"
+ "gvisor.dev/gvisor/pkg/sentry/uniqueid"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/third_party/gvsync"
)
// Task represents a thread of execution in the untrusted app. It
@@ -236,15 +236,15 @@ type Task struct {
// tc is protected by mu, and is owned by the task goroutine.
tc TaskContext
- // fsc is the task's filesystem context.
+ // fsContext is the task's filesystem context.
//
- // fsc is protected by mu, and is owned by the task goroutine.
- fsc *FSContext
+ // fsContext is protected by mu, and is owned by the task goroutine.
+ fsContext *FSContext
- // fds is the task's file descriptor table.
+ // fdTable is the task's file descriptor table.
//
- // fds is protected by mu, and is owned by the task goroutine.
- fds *FDMap
+ // fdTable is protected by mu, and is owned by the task goroutine.
+ fdTable *FDTable
// If vforkParent is not nil, it is the task that created this task with
// vfork() or clone(CLONE_VFORK), and should have its vforkStop ended when
@@ -386,10 +386,11 @@ type Task struct {
// creds is the task's credentials.
//
- // creds is protected by mu, however the value itself is immutable and can
- // only be changed by a copy. After reading the pointer, access will
- // proceed outside the scope of mu. creds is owned by the task goroutine.
- creds *auth.Credentials
+ // creds.Load() may be called without synchronization. creds.Store() is
+ // serialized by mu. creds is owned by the task goroutine. All
+ // auth.Credentials objects that creds may point to, or have pointed to
+ // in the past, must be treated as immutable.
+ creds auth.AtomicPtrCredentials
// utsns is the task's UTS namespace.
//
@@ -597,11 +598,11 @@ func (t *Task) Value(key interface{}) interface{} {
case CtxTask:
return t
case auth.CtxCredentials:
- return t.creds
+ return t.Credentials()
case context.CtxThreadGroupID:
return int32(t.ThreadGroup().ID())
case fs.CtxRoot:
- return t.fsc.RootDirectory()
+ return t.fsContext.RootDirectory()
case fs.CtxDirentCacheLimiter:
return t.k.DirentCacheLimiter
case inet.CtxStack:
@@ -665,9 +666,9 @@ func (t *Task) SyscallRestartBlock() SyscallRestartBlock {
// Preconditions: The caller must be running on the task goroutine, or t.mu
// must be locked.
func (t *Task) IsChrooted() bool {
- realRoot := t.k.mounts.Root()
+ realRoot := t.tg.mounts.Root()
defer realRoot.DecRef()
- root := t.fsc.RootDirectory()
+ root := t.fsContext.RootDirectory()
if root != nil {
defer root.DecRef()
}
@@ -688,29 +689,68 @@ func (t *Task) TaskContext() *TaskContext {
// Precondition: The caller must be running on the task goroutine, or t.mu must
// be locked.
func (t *Task) FSContext() *FSContext {
- return t.fsc
+ return t.fsContext
}
-// FDMap returns t's FDMap. FDMap does not take an additional reference on the
-// returned FDMap.
+// FDTable returns t's FDTable. FDMTable does not take an additional reference
+// on the returned FDMap.
//
// Precondition: The caller must be running on the task goroutine, or t.mu must
// be locked.
-func (t *Task) FDMap() *FDMap {
- return t.fds
+func (t *Task) FDTable() *FDTable {
+ return t.fdTable
+}
+
+// GetFile is a convenience wrapper t.FDTable().GetFile.
+//
+// Precondition: same as FDTable.
+func (t *Task) GetFile(fd int32) *fs.File {
+ f, _ := t.fdTable.Get(fd)
+ return f
+}
+
+// NewFDs is a convenience wrapper for t.FDTable().NewFDs.
+//
+// This automatically passes the task as the context.
+//
+// Precondition: same as FDTable.
+func (t *Task) NewFDs(fd int32, files []*fs.File, flags FDFlags) ([]int32, error) {
+ return t.fdTable.NewFDs(t, fd, files, flags)
+}
+
+// NewFDFrom is a convenience wrapper for t.FDTable().NewFDs with a single file.
+//
+// This automatically passes the task as the context.
+//
+// Precondition: same as FDTable.
+func (t *Task) NewFDFrom(fd int32, file *fs.File, flags FDFlags) (int32, error) {
+ fds, err := t.fdTable.NewFDs(t, fd, []*fs.File{file}, flags)
+ if err != nil {
+ return 0, err
+ }
+ return fds[0], nil
+}
+
+// NewFDAt is a convenience wrapper for t.FDTable().NewFDAt.
+//
+// This automatically passes the task as the context.
+//
+// Precondition: same as FDTable.
+func (t *Task) NewFDAt(fd int32, file *fs.File, flags FDFlags) error {
+ return t.fdTable.NewFDAt(t, fd, file, flags)
}
// WithMuLocked executes f with t.mu locked.
func (t *Task) WithMuLocked(f func(*Task)) {
t.mu.Lock()
- defer t.mu.Unlock()
f(t)
+ t.mu.Unlock()
}
// MountNamespace returns t's MountNamespace. MountNamespace does not take an
// additional reference on the returned MountNamespace.
func (t *Task) MountNamespace() *fs.MountNamespace {
- return t.k.mounts
+ return t.tg.mounts
}
// AbstractSockets returns t's AbstractSocketNamespace.
diff --git a/pkg/sentry/kernel/task_acct.go b/pkg/sentry/kernel/task_acct.go
index 1ca2a82eb..5f3e60fe8 100644
--- a/pkg/sentry/kernel/task_acct.go
+++ b/pkg/sentry/kernel/task_acct.go
@@ -17,11 +17,11 @@ package kernel
// Accounting, limits, timers.
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/limits"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// Getitimer implements getitimer(2).
diff --git a/pkg/sentry/kernel/task_block.go b/pkg/sentry/kernel/task_block.go
index 1c76c4d84..2a2e6f662 100644
--- a/pkg/sentry/kernel/task_block.go
+++ b/pkg/sentry/kernel/task_block.go
@@ -17,8 +17,8 @@ package kernel
import (
"time"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// BlockWithTimeout blocks t until an event is received from C, the application
diff --git a/pkg/sentry/kernel/task_clone.go b/pkg/sentry/kernel/task_clone.go
index bba8ddd39..0916fd658 100644
--- a/pkg/sentry/kernel/task_clone.go
+++ b/pkg/sentry/kernel/task_clone.go
@@ -15,10 +15,10 @@
package kernel
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/bpf"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/bpf"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// SharingOptions controls what resources are shared by a new task created by
@@ -214,20 +214,20 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
}
}
- var fsc *FSContext
+ var fsContext *FSContext
if opts.NewFSContext {
- fsc = t.fsc.Fork()
+ fsContext = t.fsContext.Fork()
} else {
- fsc = t.fsc
- fsc.IncRef()
+ fsContext = t.fsContext
+ fsContext.IncRef()
}
- var fds *FDMap
+ var fdTable *FDTable
if opts.NewFiles {
- fds = t.fds.Fork()
+ fdTable = t.fdTable.Fork()
} else {
- fds = t.fds
- fds.IncRef()
+ fdTable = t.fdTable
+ fdTable.IncRef()
}
pidns := t.tg.pidns
@@ -238,11 +238,12 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
}
tg := t.tg
if opts.NewThreadGroup {
+ tg.mounts.IncRef()
sh := t.tg.signalHandlers
if opts.NewSignalHandlers {
sh = sh.Fork()
}
- tg = t.k.newThreadGroup(pidns, sh, opts.TerminationSignal, tg.limits.GetCopy(), t.k.monotonicClock)
+ tg = t.k.newThreadGroup(tg.mounts, pidns, sh, opts.TerminationSignal, tg.limits.GetCopy(), t.k.monotonicClock)
}
cfg := &TaskConfig{
@@ -250,8 +251,8 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
ThreadGroup: tg,
SignalMask: t.SignalMask(),
TaskContext: tc,
- FSContext: fsc,
- FDMap: fds,
+ FSContext: fsContext,
+ FDTable: fdTable,
Credentials: creds,
Niceness: t.Niceness(),
NetworkNamespaced: t.netns,
@@ -424,6 +425,7 @@ func (t *Task) Unshare(opts *SharingOptions) error {
if opts.NewAddressSpace || opts.NewSignalHandlers {
return syserror.EINVAL
}
+ creds := t.Credentials()
if opts.NewThreadGroup {
t.tg.signalHandlers.mu.Lock()
if t.tg.tasksCount != 1 {
@@ -438,8 +440,6 @@ func (t *Task) Unshare(opts *SharingOptions) error {
if t.IsChrooted() {
return syserror.EPERM
}
- // This temporary is needed because Go.
- creds := t.Credentials()
newUserNS, err := creds.NewChildUserNamespace()
if err != nil {
return err
@@ -448,6 +448,8 @@ func (t *Task) Unshare(opts *SharingOptions) error {
if err != nil {
return err
}
+ // Need to reload creds, becaue t.SetUserNamespace() changed task credentials.
+ creds = t.Credentials()
}
haveCapSysAdmin := t.HasCapability(linux.CAP_SYS_ADMIN)
if opts.NewPIDNamespace {
@@ -472,7 +474,7 @@ func (t *Task) Unshare(opts *SharingOptions) error {
}
// Note that this must happen after NewUserNamespace, so the
// new user namespace is used if there is one.
- t.utsns = t.utsns.Clone(t.creds.UserNamespace)
+ t.utsns = t.utsns.Clone(creds.UserNamespace)
}
if opts.NewIPCNamespace {
if !haveCapSysAdmin {
@@ -481,24 +483,24 @@ func (t *Task) Unshare(opts *SharingOptions) error {
}
// Note that "If CLONE_NEWIPC is set, then create the process in a new IPC
// namespace"
- t.ipcns = NewIPCNamespace(t.creds.UserNamespace)
+ t.ipcns = NewIPCNamespace(creds.UserNamespace)
}
- var oldfds *FDMap
+ var oldFDTable *FDTable
if opts.NewFiles {
- oldfds = t.fds
- t.fds = oldfds.Fork()
+ oldFDTable = t.fdTable
+ t.fdTable = oldFDTable.Fork()
}
- var oldfsc *FSContext
+ var oldFSContext *FSContext
if opts.NewFSContext {
- oldfsc = t.fsc
- t.fsc = oldfsc.Fork()
+ oldFSContext = t.fsContext
+ t.fsContext = oldFSContext.Fork()
}
t.mu.Unlock()
- if oldfds != nil {
- oldfds.DecRef()
+ if oldFDTable != nil {
+ oldFDTable.DecRef()
}
- if oldfsc != nil {
- oldfsc.DecRef()
+ if oldFSContext != nil {
+ oldFSContext.DecRef()
}
return nil
}
diff --git a/pkg/sentry/kernel/task_context.go b/pkg/sentry/kernel/task_context.go
index bbd294141..54b1676b0 100644
--- a/pkg/sentry/kernel/task_context.go
+++ b/pkg/sentry/kernel/task_context.go
@@ -17,16 +17,16 @@ package kernel
import (
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/cpuid"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/futex"
- "gvisor.googlesource.com/gvisor/pkg/sentry/loader"
- "gvisor.googlesource.com/gvisor/pkg/sentry/mm"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserr"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/cpuid"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/futex"
+ "gvisor.dev/gvisor/pkg/sentry/loader"
+ "gvisor.dev/gvisor/pkg/sentry/mm"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserr"
)
var errNoSyscalls = syserr.New("no syscall table found", linux.ENOEXEC)
diff --git a/pkg/sentry/kernel/task_exec.go b/pkg/sentry/kernel/task_exec.go
index 35d5cb90c..17a089b90 100644
--- a/pkg/sentry/kernel/task_exec.go
+++ b/pkg/sentry/kernel/task_exec.go
@@ -65,11 +65,11 @@ package kernel
// """
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/mm"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/mm"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// execStop is a TaskStop that a task sets on itself when it wants to execve
@@ -195,7 +195,7 @@ func (r *runSyscallAfterExecStop) execute(t *Task) taskRunState {
t.tg.pidns.owner.mu.Unlock()
// Remove FDs with the CloseOnExec flag set.
- t.fds.RemoveIf(func(file *fs.File, flags FDFlags) bool {
+ t.fdTable.RemoveIf(func(file *fs.File, flags FDFlags) bool {
return flags.CloseOnExec
})
diff --git a/pkg/sentry/kernel/task_exit.go b/pkg/sentry/kernel/task_exit.go
index 158e665d3..535f03e50 100644
--- a/pkg/sentry/kernel/task_exit.go
+++ b/pkg/sentry/kernel/task_exit.go
@@ -29,11 +29,11 @@ import (
"fmt"
"strconv"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// An ExitStatus is a value communicated from an exiting task or thread group
@@ -265,8 +265,8 @@ func (*runExitMain) execute(t *Task) taskRunState {
// Releasing the MM unblocks a blocked CLONE_VFORK parent.
t.unstopVforkParent()
- t.fsc.DecRef()
- t.fds.DecRef()
+ t.fsContext.DecRef()
+ t.fdTable.DecRef()
// If this is the last task to exit from the thread group, release the
// thread group's resources.
diff --git a/pkg/sentry/kernel/task_futex.go b/pkg/sentry/kernel/task_futex.go
index f98097c2c..c211b5b74 100644
--- a/pkg/sentry/kernel/task_futex.go
+++ b/pkg/sentry/kernel/task_futex.go
@@ -15,8 +15,8 @@
package kernel
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/futex"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/futex"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// Futex returns t's futex manager.
@@ -34,14 +34,14 @@ func (t *Task) SwapUint32(addr usermem.Addr, new uint32) (uint32, error) {
})
}
-// CompareAndSwapUint32 implemets futex.Target.CompareAndSwapUint32.
+// CompareAndSwapUint32 implements futex.Target.CompareAndSwapUint32.
func (t *Task) CompareAndSwapUint32(addr usermem.Addr, old, new uint32) (uint32, error) {
return t.MemoryManager().CompareAndSwapUint32(t, addr, old, new, usermem.IOOpts{
AddressSpaceActive: true,
})
}
-// LoadUint32 implemets futex.Target.LoadUint32.
+// LoadUint32 implements futex.Target.LoadUint32.
func (t *Task) LoadUint32(addr usermem.Addr) (uint32, error) {
return t.MemoryManager().LoadUint32(t, addr, usermem.IOOpts{
AddressSpaceActive: true,
diff --git a/pkg/sentry/kernel/task_identity.go b/pkg/sentry/kernel/task_identity.go
index ec95f78d0..78ff14b20 100644
--- a/pkg/sentry/kernel/task_identity.go
+++ b/pkg/sentry/kernel/task_identity.go
@@ -15,40 +15,32 @@
package kernel
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/sentry/mm"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/mm"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// Credentials returns t's credentials.
//
// This value must be considered immutable.
func (t *Task) Credentials() *auth.Credentials {
- t.mu.Lock()
- defer t.mu.Unlock()
- return t.creds
+ return t.creds.Load()
}
// UserNamespace returns the user namespace associated with the task.
func (t *Task) UserNamespace() *auth.UserNamespace {
- t.mu.Lock()
- defer t.mu.Unlock()
- return t.creds.UserNamespace
+ return t.Credentials().UserNamespace
}
// HasCapabilityIn checks if the task has capability cp in user namespace ns.
func (t *Task) HasCapabilityIn(cp linux.Capability, ns *auth.UserNamespace) bool {
- t.mu.Lock()
- defer t.mu.Unlock()
- return t.creds.HasCapabilityIn(cp, ns)
+ return t.Credentials().HasCapabilityIn(cp, ns)
}
// HasCapability checks if the task has capability cp in its user namespace.
func (t *Task) HasCapability(cp linux.Capability) bool {
- t.mu.Lock()
- defer t.mu.Unlock()
- return t.creds.HasCapability(cp)
+ return t.Credentials().HasCapability(cp)
}
// SetUID implements the semantics of setuid(2).
@@ -57,9 +49,12 @@ func (t *Task) SetUID(uid auth.UID) error {
if !uid.Ok() {
return syserror.EINVAL
}
+
t.mu.Lock()
defer t.mu.Unlock()
- kuid := t.creds.UserNamespace.MapToKUID(uid)
+
+ creds := t.Credentials()
+ kuid := creds.UserNamespace.MapToKUID(uid)
if !kuid.Ok() {
return syserror.EINVAL
}
@@ -67,17 +62,17 @@ func (t *Task) SetUID(uid auth.UID) error {
// effective UID of the caller is root (more precisely: if the caller has
// the CAP_SETUID capability), the real UID and saved set-user-ID are also
// set." - setuid(2)
- if t.creds.HasCapability(linux.CAP_SETUID) {
+ if creds.HasCapability(linux.CAP_SETUID) {
t.setKUIDsUncheckedLocked(kuid, kuid, kuid)
return nil
}
// "EPERM: The user is not privileged (Linux: does not have the CAP_SETUID
// capability) and uid does not match the real UID or saved set-user-ID of
// the calling process."
- if kuid != t.creds.RealKUID && kuid != t.creds.SavedKUID {
+ if kuid != creds.RealKUID && kuid != creds.SavedKUID {
return syserror.EPERM
}
- t.setKUIDsUncheckedLocked(t.creds.RealKUID, kuid, t.creds.SavedKUID)
+ t.setKUIDsUncheckedLocked(creds.RealKUID, kuid, creds.SavedKUID)
return nil
}
@@ -87,37 +82,38 @@ func (t *Task) SetREUID(r, e auth.UID) error {
defer t.mu.Unlock()
// "Supplying a value of -1 for either the real or effective user ID forces
// the system to leave that ID unchanged." - setreuid(2)
- newR := t.creds.RealKUID
+ creds := t.Credentials()
+ newR := creds.RealKUID
if r.Ok() {
- newR = t.creds.UserNamespace.MapToKUID(r)
+ newR = creds.UserNamespace.MapToKUID(r)
if !newR.Ok() {
return syserror.EINVAL
}
}
- newE := t.creds.EffectiveKUID
+ newE := creds.EffectiveKUID
if e.Ok() {
- newE = t.creds.UserNamespace.MapToKUID(e)
+ newE = creds.UserNamespace.MapToKUID(e)
if !newE.Ok() {
return syserror.EINVAL
}
}
- if !t.creds.HasCapability(linux.CAP_SETUID) {
+ if !creds.HasCapability(linux.CAP_SETUID) {
// "Unprivileged processes may only set the effective user ID to the
// real user ID, the effective user ID, or the saved set-user-ID."
- if newE != t.creds.RealKUID && newE != t.creds.EffectiveKUID && newE != t.creds.SavedKUID {
+ if newE != creds.RealKUID && newE != creds.EffectiveKUID && newE != creds.SavedKUID {
return syserror.EPERM
}
// "Unprivileged users may only set the real user ID to the real user
// ID or the effective user ID."
- if newR != t.creds.RealKUID && newR != t.creds.EffectiveKUID {
+ if newR != creds.RealKUID && newR != creds.EffectiveKUID {
return syserror.EPERM
}
}
// "If the real user ID is set (i.e., ruid is not -1) or the effective user
// ID is set to a value not equal to the previous real user ID, the saved
// set-user-ID will be set to the new effective user ID."
- newS := t.creds.SavedKUID
- if r.Ok() || (e.Ok() && newE != t.creds.EffectiveKUID) {
+ newS := creds.SavedKUID
+ if r.Ok() || (e.Ok() && newE != creds.EffectiveKUID) {
newS = newE
}
t.setKUIDsUncheckedLocked(newR, newE, newS)
@@ -136,23 +132,24 @@ func (t *Task) SetRESUID(r, e, s auth.UID) error {
// arguments equals -1, the corresponding value is not changed." -
// setresuid(2)
var err error
- newR := t.creds.RealKUID
+ creds := t.Credentials()
+ newR := creds.RealKUID
if r.Ok() {
- newR, err = t.creds.UseUID(r)
+ newR, err = creds.UseUID(r)
if err != nil {
return err
}
}
- newE := t.creds.EffectiveKUID
+ newE := creds.EffectiveKUID
if e.Ok() {
- newE, err = t.creds.UseUID(e)
+ newE, err = creds.UseUID(e)
if err != nil {
return err
}
}
- newS := t.creds.SavedKUID
+ newS := creds.SavedKUID
if s.Ok() {
- newS, err = t.creds.UseUID(s)
+ newS, err = creds.UseUID(s)
if err != nil {
return err
}
@@ -163,10 +160,10 @@ func (t *Task) SetRESUID(r, e, s auth.UID) error {
// Preconditions: t.mu must be locked.
func (t *Task) setKUIDsUncheckedLocked(newR, newE, newS auth.KUID) {
- root := t.creds.UserNamespace.MapToKUID(auth.RootUID)
- oldR, oldE, oldS := t.creds.RealKUID, t.creds.EffectiveKUID, t.creds.SavedKUID
- t.creds = t.creds.Fork() // See doc for creds.
- t.creds.RealKUID, t.creds.EffectiveKUID, t.creds.SavedKUID = newR, newE, newS
+ creds := t.Credentials().Fork() // The credentials object is immutable. See doc for creds.
+ root := creds.UserNamespace.MapToKUID(auth.RootUID)
+ oldR, oldE, oldS := creds.RealKUID, creds.EffectiveKUID, creds.SavedKUID
+ creds.RealKUID, creds.EffectiveKUID, creds.SavedKUID = newR, newE, newS
// "1. If one or more of the real, effective or saved set user IDs was
// previously 0, and as a result of the UID changes all of these IDs have a
@@ -184,9 +181,9 @@ func (t *Task) setKUIDsUncheckedLocked(newR, newE, newS auth.KUID) {
// being cleared." (A thread's effective capability set is always
// cleared when such a credential change is made,
// regardless of the setting of the "keep capabilities" flag.)
- if !t.creds.KeepCaps {
- t.creds.PermittedCaps = 0
- t.creds.EffectiveCaps = 0
+ if !creds.KeepCaps {
+ creds.PermittedCaps = 0
+ creds.EffectiveCaps = 0
}
}
// """
@@ -197,9 +194,9 @@ func (t *Task) setKUIDsUncheckedLocked(newR, newE, newS auth.KUID) {
// permitted set is copied to the effective set.
// """
if oldE == root && newE != root {
- t.creds.EffectiveCaps = 0
+ creds.EffectiveCaps = 0
} else if oldE != root && newE == root {
- t.creds.EffectiveCaps = t.creds.PermittedCaps
+ creds.EffectiveCaps = creds.PermittedCaps
}
// "4. If the filesystem user ID is changed from 0 to nonzero (see
// setfsuid(2)), then the following capabilities are cleared from the
@@ -220,6 +217,7 @@ func (t *Task) setKUIDsUncheckedLocked(newR, newE, newS auth.KUID) {
// Not documented, but compare Linux's kernel/cred.c:commit_creds().
t.parentDeathSignal = 0
}
+ t.creds.Store(creds)
}
// SetGID implements the semantics of setgid(2).
@@ -227,20 +225,23 @@ func (t *Task) SetGID(gid auth.GID) error {
if !gid.Ok() {
return syserror.EINVAL
}
+
t.mu.Lock()
defer t.mu.Unlock()
- kgid := t.creds.UserNamespace.MapToKGID(gid)
+
+ creds := t.Credentials()
+ kgid := creds.UserNamespace.MapToKGID(gid)
if !kgid.Ok() {
return syserror.EINVAL
}
- if t.creds.HasCapability(linux.CAP_SETGID) {
+ if creds.HasCapability(linux.CAP_SETGID) {
t.setKGIDsUncheckedLocked(kgid, kgid, kgid)
return nil
}
- if kgid != t.creds.RealKGID && kgid != t.creds.SavedKGID {
+ if kgid != creds.RealKGID && kgid != creds.SavedKGID {
return syserror.EPERM
}
- t.setKGIDsUncheckedLocked(t.creds.RealKGID, kgid, t.creds.SavedKGID)
+ t.setKGIDsUncheckedLocked(creds.RealKGID, kgid, creds.SavedKGID)
return nil
}
@@ -248,30 +249,32 @@ func (t *Task) SetGID(gid auth.GID) error {
func (t *Task) SetREGID(r, e auth.GID) error {
t.mu.Lock()
defer t.mu.Unlock()
- newR := t.creds.RealKGID
+
+ creds := t.Credentials()
+ newR := creds.RealKGID
if r.Ok() {
- newR = t.creds.UserNamespace.MapToKGID(r)
+ newR = creds.UserNamespace.MapToKGID(r)
if !newR.Ok() {
return syserror.EINVAL
}
}
- newE := t.creds.EffectiveKGID
+ newE := creds.EffectiveKGID
if e.Ok() {
- newE = t.creds.UserNamespace.MapToKGID(e)
+ newE = creds.UserNamespace.MapToKGID(e)
if !newE.Ok() {
return syserror.EINVAL
}
}
- if !t.creds.HasCapability(linux.CAP_SETGID) {
- if newE != t.creds.RealKGID && newE != t.creds.EffectiveKGID && newE != t.creds.SavedKGID {
+ if !creds.HasCapability(linux.CAP_SETGID) {
+ if newE != creds.RealKGID && newE != creds.EffectiveKGID && newE != creds.SavedKGID {
return syserror.EPERM
}
- if newR != t.creds.RealKGID && newR != t.creds.EffectiveKGID {
+ if newR != creds.RealKGID && newR != creds.EffectiveKGID {
return syserror.EPERM
}
}
- newS := t.creds.SavedKGID
- if r.Ok() || (e.Ok() && newE != t.creds.EffectiveKGID) {
+ newS := creds.SavedKGID
+ if r.Ok() || (e.Ok() && newE != creds.EffectiveKGID) {
newS = newE
}
t.setKGIDsUncheckedLocked(newR, newE, newS)
@@ -280,26 +283,29 @@ func (t *Task) SetREGID(r, e auth.GID) error {
// SetRESGID implements the semantics of the setresgid(2) syscall.
func (t *Task) SetRESGID(r, e, s auth.GID) error {
+ var err error
+
t.mu.Lock()
defer t.mu.Unlock()
- var err error
- newR := t.creds.RealKGID
+
+ creds := t.Credentials()
+ newR := creds.RealKGID
if r.Ok() {
- newR, err = t.creds.UseGID(r)
+ newR, err = creds.UseGID(r)
if err != nil {
return err
}
}
- newE := t.creds.EffectiveKGID
+ newE := creds.EffectiveKGID
if e.Ok() {
- newE, err = t.creds.UseGID(e)
+ newE, err = creds.UseGID(e)
if err != nil {
return err
}
}
- newS := t.creds.SavedKGID
+ newS := creds.SavedKGID
if s.Ok() {
- newS, err = t.creds.UseGID(s)
+ newS, err = creds.UseGID(s)
if err != nil {
return err
}
@@ -309,9 +315,9 @@ func (t *Task) SetRESGID(r, e, s auth.GID) error {
}
func (t *Task) setKGIDsUncheckedLocked(newR, newE, newS auth.KGID) {
- oldE := t.creds.EffectiveKGID
- t.creds = t.creds.Fork() // See doc for creds.
- t.creds.RealKGID, t.creds.EffectiveKGID, t.creds.SavedKGID = newR, newE, newS
+ creds := t.Credentials().Fork() // The credentials object is immutable. See doc for creds.
+ oldE := creds.EffectiveKGID
+ creds.RealKGID, creds.EffectiveKGID, creds.SavedKGID = newR, newE, newS
if oldE != newE {
// "[dumpability] is reset to the current value contained in
@@ -327,6 +333,7 @@ func (t *Task) setKGIDsUncheckedLocked(newR, newE, newS auth.KGID) {
// kernel/cred.c:commit_creds().
t.parentDeathSignal = 0
}
+ t.creds.Store(creds)
}
// SetExtraGIDs attempts to change t's supplemental groups. All IDs are
@@ -334,19 +341,21 @@ func (t *Task) setKGIDsUncheckedLocked(newR, newE, newS auth.KGID) {
func (t *Task) SetExtraGIDs(gids []auth.GID) error {
t.mu.Lock()
defer t.mu.Unlock()
- if !t.creds.HasCapability(linux.CAP_SETGID) {
+ creds := t.Credentials()
+ if !creds.HasCapability(linux.CAP_SETGID) {
return syserror.EPERM
}
kgids := make([]auth.KGID, len(gids))
for i, gid := range gids {
- kgid := t.creds.UserNamespace.MapToKGID(gid)
+ kgid := creds.UserNamespace.MapToKGID(gid)
if !kgid.Ok() {
return syserror.EINVAL
}
kgids[i] = kgid
}
- t.creds = t.creds.Fork() // See doc for creds.
- t.creds.ExtraKGIDs = kgids
+ creds = creds.Fork() // The credentials object is immutable. See doc for creds.
+ creds.ExtraKGIDs = kgids
+ t.creds.Store(creds)
return nil
}
@@ -360,27 +369,29 @@ func (t *Task) SetCapabilitySets(permitted, inheritable, effective auth.Capabili
if effective & ^permitted != 0 {
return syserror.EPERM
}
+ creds := t.Credentials()
// "It is also a limiting superset for the capabilities that may be added
// to the inheritable set by a thread that does not have the CAP_SETPCAP
// capability in its effective set."
- if !t.creds.HasCapability(linux.CAP_SETPCAP) && (inheritable & ^(t.creds.InheritableCaps|t.creds.PermittedCaps) != 0) {
+ if !creds.HasCapability(linux.CAP_SETPCAP) && (inheritable & ^(creds.InheritableCaps|creds.PermittedCaps) != 0) {
return syserror.EPERM
}
// "If a thread drops a capability from its permitted set, it can never
// reacquire that capability (unless it execve(2)s ..."
- if permitted & ^t.creds.PermittedCaps != 0 {
+ if permitted & ^creds.PermittedCaps != 0 {
return syserror.EPERM
}
// "... if a capability is not in the bounding set, then a thread can't add
// this capability to its inheritable set, even if it was in its permitted
// capabilities ..."
- if inheritable & ^(t.creds.InheritableCaps|t.creds.BoundingCaps) != 0 {
+ if inheritable & ^(creds.InheritableCaps|creds.BoundingCaps) != 0 {
return syserror.EPERM
}
- t.creds = t.creds.Fork() // See doc for creds.
- t.creds.PermittedCaps = permitted
- t.creds.InheritableCaps = inheritable
- t.creds.EffectiveCaps = effective
+ creds = creds.Fork() // The credentials object is immutable. See doc for creds.
+ creds.PermittedCaps = permitted
+ creds.InheritableCaps = inheritable
+ creds.EffectiveCaps = effective
+ t.creds.Store(creds)
return nil
}
@@ -389,11 +400,13 @@ func (t *Task) SetCapabilitySets(permitted, inheritable, effective auth.Capabili
func (t *Task) DropBoundingCapability(cp linux.Capability) error {
t.mu.Lock()
defer t.mu.Unlock()
- if !t.creds.HasCapability(linux.CAP_SETPCAP) {
+ creds := t.Credentials()
+ if !creds.HasCapability(linux.CAP_SETPCAP) {
return syserror.EPERM
}
- t.creds = t.creds.Fork() // See doc for creds.
- t.creds.BoundingCaps &^= auth.CapabilitySetOf(cp)
+ creds = creds.Fork() // The credentials object is immutable. See doc for creds.
+ creds.BoundingCaps &^= auth.CapabilitySetOf(cp)
+ t.creds.Store(creds)
return nil
}
@@ -402,31 +415,33 @@ func (t *Task) SetUserNamespace(ns *auth.UserNamespace) error {
t.mu.Lock()
defer t.mu.Unlock()
+ creds := t.Credentials()
// "A process reassociating itself with a user namespace must have the
// CAP_SYS_ADMIN capability in the target user namespace." - setns(2)
//
// If t just created ns, then t.creds is guaranteed to have CAP_SYS_ADMIN
// in ns (by rule 3 in auth.Credentials.HasCapability).
- if !t.creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, ns) {
+ if !creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, ns) {
return syserror.EPERM
}
- t.creds = t.creds.Fork() // See doc for creds.
- t.creds.UserNamespace = ns
+ creds = creds.Fork() // The credentials object is immutable. See doc for creds.
+ creds.UserNamespace = ns
// "The child process created by clone(2) with the CLONE_NEWUSER flag
// starts out with a complete set of capabilities in the new user
// namespace. Likewise, a process that creates a new user namespace using
// unshare(2) or joins an existing user namespace using setns(2) gains a
// full set of capabilities in that namespace."
- t.creds.PermittedCaps = auth.AllCapabilities
- t.creds.InheritableCaps = 0
- t.creds.EffectiveCaps = auth.AllCapabilities
- t.creds.BoundingCaps = auth.AllCapabilities
+ creds.PermittedCaps = auth.AllCapabilities
+ creds.InheritableCaps = 0
+ creds.EffectiveCaps = auth.AllCapabilities
+ creds.BoundingCaps = auth.AllCapabilities
// "A call to clone(2), unshare(2), or setns(2) using the CLONE_NEWUSER
// flag sets the "securebits" flags (see capabilities(7)) to their default
// values (all flags disabled) in the child (for clone(2)) or caller (for
// unshare(2), or setns(2)." - user_namespaces(7)
- t.creds.KeepCaps = false
+ creds.KeepCaps = false
+ t.creds.Store(creds)
return nil
}
@@ -435,8 +450,9 @@ func (t *Task) SetUserNamespace(ns *auth.UserNamespace) error {
func (t *Task) SetKeepCaps(k bool) {
t.mu.Lock()
defer t.mu.Unlock()
- t.creds = t.creds.Fork() // See doc for creds.
- t.creds.KeepCaps = k
+ creds := t.Credentials().Fork() // The credentials object is immutable. See doc for creds.
+ creds.KeepCaps = k
+ t.creds.Store(creds)
}
// updateCredsForExec updates t.creds to reflect an execve().
@@ -512,15 +528,16 @@ func (t *Task) updateCredsForExecLocked() {
// the effective user ID.
var newPermitted auth.CapabilitySet // since F(inheritable) == F(permitted) == 0
fileEffective := false
- root := t.creds.UserNamespace.MapToKUID(auth.RootUID)
- if t.creds.EffectiveKUID == root || t.creds.RealKUID == root {
- newPermitted = t.creds.InheritableCaps | t.creds.BoundingCaps
- if t.creds.EffectiveKUID == root {
+ creds := t.Credentials()
+ root := creds.UserNamespace.MapToKUID(auth.RootUID)
+ if creds.EffectiveKUID == root || creds.RealKUID == root {
+ newPermitted = creds.InheritableCaps | creds.BoundingCaps
+ if creds.EffectiveKUID == root {
fileEffective = true
}
}
- t.creds = t.creds.Fork() // See doc for creds.
+ creds = creds.Fork() // The credentials object is immutable. See doc for creds.
// Now we enter poorly-documented, somewhat confusing territory. (The
// accompanying comment in Linux's security/commoncap.c:cap_bprm_set_creds
@@ -562,27 +579,28 @@ func (t *Task) updateCredsForExecLocked() {
// But since no_new_privs is always set (A3 is always true), this becomes
// much simpler. If B1 and B2 are false, C2 is a no-op. If B3 is false, C1
// is a no-op. So we can just do C1 and C2 unconditionally.
- if t.creds.EffectiveKUID != t.creds.RealKUID || t.creds.EffectiveKGID != t.creds.RealKGID {
- t.creds.EffectiveKUID = t.creds.RealKUID
- t.creds.EffectiveKGID = t.creds.RealKGID
+ if creds.EffectiveKUID != creds.RealKUID || creds.EffectiveKGID != creds.RealKGID {
+ creds.EffectiveKUID = creds.RealKUID
+ creds.EffectiveKGID = creds.RealKGID
t.parentDeathSignal = 0
}
// (Saved set-user-ID is always set to the new effective user ID, and saved
// set-group-ID is always set to the new effective group ID, regardless of
// the above.)
- t.creds.SavedKUID = t.creds.RealKUID
- t.creds.SavedKGID = t.creds.RealKGID
- t.creds.PermittedCaps &= newPermitted
+ creds.SavedKUID = creds.RealKUID
+ creds.SavedKGID = creds.RealKGID
+ creds.PermittedCaps &= newPermitted
if fileEffective {
- t.creds.EffectiveCaps = t.creds.PermittedCaps
+ creds.EffectiveCaps = creds.PermittedCaps
} else {
- t.creds.EffectiveCaps = 0
+ creds.EffectiveCaps = 0
}
// prctl(2): The "keep capabilities" value will be reset to 0 on subsequent
// calls to execve(2).
- t.creds.KeepCaps = false
+ creds.KeepCaps = false
// "The bounding set is inherited at fork(2) from the thread's parent, and
// is preserved across an execve(2)". So we're done.
+ t.creds.Store(creds)
}
diff --git a/pkg/sentry/kernel/task_log.go b/pkg/sentry/kernel/task_log.go
index e0e57e8bd..a29e9b9eb 100644
--- a/pkg/sentry/kernel/task_log.go
+++ b/pkg/sentry/kernel/task_log.go
@@ -18,8 +18,8 @@ import (
"fmt"
"sort"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
const (
@@ -63,7 +63,7 @@ func (t *Task) DebugDumpState() {
if mm := t.MemoryManager(); mm != nil {
t.Debugf("Mappings:\n%s", mm)
}
- t.Debugf("FDMap:\n%s", t.fds)
+ t.Debugf("FDTable:\n%s", t.fdTable)
}
// debugDumpRegisters logs register state at log level debug.
diff --git a/pkg/sentry/kernel/task_net.go b/pkg/sentry/kernel/task_net.go
index 04c684c1a..172a31e1d 100644
--- a/pkg/sentry/kernel/task_net.go
+++ b/pkg/sentry/kernel/task_net.go
@@ -15,7 +15,7 @@
package kernel
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/inet"
+ "gvisor.dev/gvisor/pkg/sentry/inet"
)
// IsNetworkNamespaced returns true if t is in a non-root network namespace.
diff --git a/pkg/sentry/kernel/task_run.go b/pkg/sentry/kernel/task_run.go
index a79101a18..c92266c59 100644
--- a/pkg/sentry/kernel/task_run.go
+++ b/pkg/sentry/kernel/task_run.go
@@ -19,13 +19,13 @@ import (
"runtime"
"sync/atomic"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/hostcpu"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/hostcpu"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// A taskRunState is a reified state in the task state machine. See README.md
diff --git a/pkg/sentry/kernel/task_sched.go b/pkg/sentry/kernel/task_sched.go
index 1c94ab11b..e76c069b0 100644
--- a/pkg/sentry/kernel/task_sched.go
+++ b/pkg/sentry/kernel/task_sched.go
@@ -22,13 +22,13 @@ import (
"sync/atomic"
"time"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/hostcpu"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/sched"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/limits"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/hostcpu"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/sched"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// TaskGoroutineState is a coarse representation of the current execution
diff --git a/pkg/sentry/kernel/task_signals.go b/pkg/sentry/kernel/task_signals.go
index 654cf7525..266959a07 100644
--- a/pkg/sentry/kernel/task_signals.go
+++ b/pkg/sentry/kernel/task_signals.go
@@ -21,13 +21,13 @@ import (
"sync/atomic"
"time"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/eventchannel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- ucspb "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/eventchannel"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ ucspb "gvisor.dev/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// SignalAction is an internal signal action.
diff --git a/pkg/sentry/kernel/task_start.go b/pkg/sentry/kernel/task_start.go
index b42531e57..a88bf3951 100644
--- a/pkg/sentry/kernel/task_start.go
+++ b/pkg/sentry/kernel/task_start.go
@@ -15,13 +15,13 @@
package kernel
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/futex"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/sched"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/futex"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/sched"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// TaskConfig defines the configuration of a new Task (see below).
@@ -52,9 +52,10 @@ type TaskConfig struct {
// succeeds.
FSContext *FSContext
- // FDMap is the FDMap of the new task. A reference must be held on FDMap,
- // which is transferred to TaskSet.NewTask whether or not it succeeds.
- FDMap *FDMap
+ // FDTable is the FDTableof the new task. A reference must be held on
+ // FDMap, which is transferred to TaskSet.NewTask whether or not it
+ // succeeds.
+ FDTable *FDTable
// Credentials is the Credentials of the new task.
Credentials *auth.Credentials
@@ -90,7 +91,7 @@ func (ts *TaskSet) NewTask(cfg *TaskConfig) (*Task, error) {
if err != nil {
cfg.TaskContext.release()
cfg.FSContext.DecRef()
- cfg.FDMap.DecRef()
+ cfg.FDTable.DecRef()
return nil, err
}
return t, nil
@@ -112,14 +113,13 @@ func (ts *TaskSet) newTask(cfg *TaskConfig) (*Task, error) {
signalMask: cfg.SignalMask,
signalStack: arch.SignalStack{Flags: arch.SignalStackFlagDisable},
tc: *tc,
- fsc: cfg.FSContext,
- fds: cfg.FDMap,
+ fsContext: cfg.FSContext,
+ fdTable: cfg.FDTable,
p: cfg.Kernel.Platform.NewContext(),
k: cfg.Kernel,
ptraceTracees: make(map[*Task]struct{}),
allowedCPUMask: cfg.AllowedCPUMask.Copy(),
ioUsage: &usage.IO{},
- creds: cfg.Credentials,
niceness: cfg.Niceness,
netns: cfg.NetworkNamespaced,
utsns: cfg.UTSNamespace,
@@ -129,6 +129,7 @@ func (ts *TaskSet) newTask(cfg *TaskConfig) (*Task, error) {
futexWaiter: futex.NewWaiter(),
containerID: cfg.ContainerID,
}
+ t.creds.Store(cfg.Credentials)
t.endStopCond.L = &t.tg.signalHandlers.mu
t.ptraceTracer.Store((*Task)(nil))
// We don't construct t.blockingTimer until Task.run(); see that function
diff --git a/pkg/sentry/kernel/task_stop.go b/pkg/sentry/kernel/task_stop.go
index e735a5dd0..10c6e455c 100644
--- a/pkg/sentry/kernel/task_stop.go
+++ b/pkg/sentry/kernel/task_stop.go
@@ -172,7 +172,7 @@ func (t *Task) beginStopLocked() {
}
}
-// endStopLocked decerements t.stopCount to indicate that an existing internal
+// endStopLocked decrements t.stopCount to indicate that an existing internal
// or external stop no longer applies to t.
//
// Preconditions: The signal mutex must be locked.
diff --git a/pkg/sentry/kernel/task_syscall.go b/pkg/sentry/kernel/task_syscall.go
index a9283d0df..b543d536a 100644
--- a/pkg/sentry/kernel/task_syscall.go
+++ b/pkg/sentry/kernel/task_syscall.go
@@ -19,13 +19,13 @@ import (
"os"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/bits"
- "gvisor.googlesource.com/gvisor/pkg/metric"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/bits"
+ "gvisor.dev/gvisor/pkg/metric"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// SyscallRestartErrno represents a ERESTART* errno defined in the Linux's kernel
diff --git a/pkg/sentry/kernel/task_test.go b/pkg/sentry/kernel/task_test.go
index b895361d0..cfcde9a7a 100644
--- a/pkg/sentry/kernel/task_test.go
+++ b/pkg/sentry/kernel/task_test.go
@@ -17,7 +17,7 @@ package kernel
import (
"testing"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/sched"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/sched"
)
func TestTaskCPU(t *testing.T) {
diff --git a/pkg/sentry/kernel/task_usermem.go b/pkg/sentry/kernel/task_usermem.go
index 461bd7316..518bfe1bd 100644
--- a/pkg/sentry/kernel/task_usermem.go
+++ b/pkg/sentry/kernel/task_usermem.go
@@ -17,9 +17,9 @@ package kernel
import (
"math"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// MAX_RW_COUNT is the maximum size in bytes of a single read or write.
diff --git a/pkg/sentry/kernel/thread_group.go b/pkg/sentry/kernel/thread_group.go
index 8bd53928e..2a97e3e8e 100644
--- a/pkg/sentry/kernel/thread_group.go
+++ b/pkg/sentry/kernel/thread_group.go
@@ -18,10 +18,11 @@ import (
"sync"
"sync/atomic"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/limits"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
)
// A ThreadGroup is a logical grouping of tasks that has widespread
@@ -236,13 +237,21 @@ type ThreadGroup struct {
// rscr is the thread group's RSEQ critical region.
rscr atomic.Value `state:".(*RSEQCriticalRegion)"`
+
+ // mounts is the thread group's mount namespace. This does not really
+ // correspond to a "mount namespace" in Linux, but is more like a
+ // complete VFS that need not be shared between processes. See the
+ // comment in mounts.go for more information.
+ //
+ // mounts is immutable.
+ mounts *fs.MountNamespace
}
// newThreadGroup returns a new, empty thread group in PID namespace ns. The
// thread group leader will send its parent terminationSignal when it exits.
// The new thread group isn't visible to the system until a task has been
// created inside of it by a successful call to TaskSet.NewTask.
-func (k *Kernel) newThreadGroup(ns *PIDNamespace, sh *SignalHandlers, terminationSignal linux.Signal, limits *limits.LimitSet, monotonicClock *timekeeperClock) *ThreadGroup {
+func (k *Kernel) newThreadGroup(mounts *fs.MountNamespace, ns *PIDNamespace, sh *SignalHandlers, terminationSignal linux.Signal, limits *limits.LimitSet, monotonicClock *timekeeperClock) *ThreadGroup {
tg := &ThreadGroup{
threadGroupNode: threadGroupNode{
pidns: ns,
@@ -251,6 +260,7 @@ func (k *Kernel) newThreadGroup(ns *PIDNamespace, sh *SignalHandlers, terminatio
terminationSignal: terminationSignal,
ioUsage: &usage.IO{},
limits: limits,
+ mounts: mounts,
}
tg.itimerRealTimer = ktime.NewTimer(k.monotonicClock, &itimerRealListener{tg: tg})
tg.timers = make(map[linux.TimerID]*IntervalTimer)
@@ -258,7 +268,7 @@ func (k *Kernel) newThreadGroup(ns *PIDNamespace, sh *SignalHandlers, terminatio
return tg
}
-// saveRscr is invopked by stateify.
+// saveRscr is invoked by stateify.
func (tg *ThreadGroup) saveRscr() *RSEQCriticalRegion {
return tg.rscr.Load().(*RSEQCriticalRegion)
}
@@ -298,6 +308,7 @@ func (tg *ThreadGroup) release() {
for _, it := range its {
it.DestroyTimer()
}
+ tg.mounts.DecRef()
}
// forEachChildThreadGroupLocked indicates over all child ThreadGroups.
diff --git a/pkg/sentry/kernel/threads.go b/pkg/sentry/kernel/threads.go
index 656bbd46c..b21b182fc 100644
--- a/pkg/sentry/kernel/threads.go
+++ b/pkg/sentry/kernel/threads.go
@@ -18,8 +18,8 @@ import (
"fmt"
"sync"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// TasksLimit is the maximum number of threads for untrusted application.
diff --git a/pkg/sentry/kernel/time/BUILD b/pkg/sentry/kernel/time/BUILD
index 584f7c7cc..9beae4b31 100644
--- a/pkg/sentry/kernel/time/BUILD
+++ b/pkg/sentry/kernel/time/BUILD
@@ -8,7 +8,7 @@ go_library(
"context.go",
"time.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/time",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
diff --git a/pkg/sentry/kernel/time/context.go b/pkg/sentry/kernel/time/context.go
index c0660d362..8ef483dd3 100644
--- a/pkg/sentry/kernel/time/context.go
+++ b/pkg/sentry/kernel/time/context.go
@@ -15,7 +15,7 @@
package time
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/context"
)
// contextID is the time package's type for context.Context.Value keys.
diff --git a/pkg/sentry/kernel/time/time.go b/pkg/sentry/kernel/time/time.go
index 3846cf1ea..aa6c75d25 100644
--- a/pkg/sentry/kernel/time/time.go
+++ b/pkg/sentry/kernel/time/time.go
@@ -22,9 +22,9 @@ import (
"sync"
"time"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// Events that may be generated by a Clock.
@@ -142,6 +142,11 @@ func (t Time) Timeval() linux.Timeval {
return linux.NsecToTimeval(t.Nanoseconds())
}
+// StatxTimestamp converts Time to a Linux statx_timestamp.
+func (t Time) StatxTimestamp() linux.StatxTimestamp {
+ return linux.NsecToStatxTimestamp(t.Nanoseconds())
+}
+
// Add adds the duration of d to t.
func (t Time) Add(d time.Duration) Time {
if t.ns > 0 && d.Nanoseconds() > math.MaxInt64-int64(t.ns) {
diff --git a/pkg/sentry/kernel/timekeeper.go b/pkg/sentry/kernel/timekeeper.go
index 505a4fa4f..76417342a 100644
--- a/pkg/sentry/kernel/timekeeper.go
+++ b/pkg/sentry/kernel/timekeeper.go
@@ -19,11 +19,11 @@ import (
"sync"
"time"
- "gvisor.googlesource.com/gvisor/pkg/log"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- sentrytime "gvisor.googlesource.com/gvisor/pkg/sentry/time"
+ "gvisor.dev/gvisor/pkg/log"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/pgalloc"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ sentrytime "gvisor.dev/gvisor/pkg/sentry/time"
)
// Timekeeper manages all of the kernel clocks.
@@ -122,7 +122,7 @@ func (t *Timekeeper) SetClocks(c sentrytime.Clocks) {
//
// In a restored sentry, monotonic time jumps forward by approximately
// the same amount as real time. There are no guarantees here, we are
- // just making a best-effort attempt to to make it appear that the app
+ // just making a best-effort attempt to make it appear that the app
// was simply not scheduled for a long period, rather than that the
// real time clock was changed.
//
diff --git a/pkg/sentry/kernel/timekeeper_state.go b/pkg/sentry/kernel/timekeeper_state.go
index 6ce358a05..8e961c832 100644
--- a/pkg/sentry/kernel/timekeeper_state.go
+++ b/pkg/sentry/kernel/timekeeper_state.go
@@ -15,7 +15,7 @@
package kernel
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/time"
+ "gvisor.dev/gvisor/pkg/sentry/time"
)
// beforeSave is invoked by stateify.
diff --git a/pkg/sentry/kernel/timekeeper_test.go b/pkg/sentry/kernel/timekeeper_test.go
index a92ad689e..849c5b646 100644
--- a/pkg/sentry/kernel/timekeeper_test.go
+++ b/pkg/sentry/kernel/timekeeper_test.go
@@ -17,12 +17,12 @@ package kernel
import (
"testing"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest"
- "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
- sentrytime "gvisor.googlesource.com/gvisor/pkg/sentry/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/pgalloc"
+ sentrytime "gvisor.dev/gvisor/pkg/sentry/time"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// mockClocks is a sentrytime.Clocks that simply returns the times in the
diff --git a/pkg/sentry/kernel/uts_namespace.go b/pkg/sentry/kernel/uts_namespace.go
index 96fe3cbb9..0a563e715 100644
--- a/pkg/sentry/kernel/uts_namespace.go
+++ b/pkg/sentry/kernel/uts_namespace.go
@@ -17,7 +17,7 @@ package kernel
import (
"sync"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
)
// UTSNamespace represents a UTS namespace, a holder of two system identifiers:
diff --git a/pkg/sentry/kernel/vdso.go b/pkg/sentry/kernel/vdso.go
index d40ad74f4..fdd10c56c 100644
--- a/pkg/sentry/kernel/vdso.go
+++ b/pkg/sentry/kernel/vdso.go
@@ -17,11 +17,11 @@ package kernel
import (
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/binary"
- "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/sentry/pgalloc"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/safemem"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// vdsoParams are the parameters exposed to the VDSO.
diff --git a/pkg/sentry/limits/BUILD b/pkg/sentry/limits/BUILD
index 800166675..40025d62d 100644
--- a/pkg/sentry/limits/BUILD
+++ b/pkg/sentry/limits/BUILD
@@ -9,7 +9,7 @@ go_library(
"limits.go",
"linux.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/limits",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/limits",
visibility = ["//:sandbox"],
deps = [
"//pkg/abi/linux",
diff --git a/pkg/sentry/limits/context.go b/pkg/sentry/limits/context.go
index 9200edb52..6972749ed 100644
--- a/pkg/sentry/limits/context.go
+++ b/pkg/sentry/limits/context.go
@@ -15,7 +15,7 @@
package limits
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/context"
)
// contextID is the limit package's type for context.Context.Value keys.
diff --git a/pkg/sentry/limits/linux.go b/pkg/sentry/limits/linux.go
index a2b401e3d..3f71abecc 100644
--- a/pkg/sentry/limits/linux.go
+++ b/pkg/sentry/limits/linux.go
@@ -17,7 +17,7 @@ package limits
import (
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/abi/linux"
)
// FromLinuxResource maps linux resources to sentry LimitTypes.
diff --git a/pkg/sentry/loader/BUILD b/pkg/sentry/loader/BUILD
index 66300f25a..3b322f5f3 100644
--- a/pkg/sentry/loader/BUILD
+++ b/pkg/sentry/loader/BUILD
@@ -21,7 +21,7 @@ go_library(
"vdso_state.go",
":vdso_bin",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/loader",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/loader",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi",
diff --git a/pkg/sentry/loader/elf.go b/pkg/sentry/loader/elf.go
index 900236531..fba2f27fe 100644
--- a/pkg/sentry/loader/elf.go
+++ b/pkg/sentry/loader/elf.go
@@ -20,19 +20,19 @@ import (
"fmt"
"io"
- "gvisor.googlesource.com/gvisor/pkg/abi"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/binary"
- "gvisor.googlesource.com/gvisor/pkg/cpuid"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/limits"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/mm"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/cpuid"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/mm"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
const (
diff --git a/pkg/sentry/loader/interpreter.go b/pkg/sentry/loader/interpreter.go
index b88062ae5..ccf909cac 100644
--- a/pkg/sentry/loader/interpreter.go
+++ b/pkg/sentry/loader/interpreter.go
@@ -18,10 +18,10 @@ import (
"bytes"
"io"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
const (
diff --git a/pkg/sentry/loader/loader.go b/pkg/sentry/loader/loader.go
index dc1a52398..baa12d9a0 100644
--- a/pkg/sentry/loader/loader.go
+++ b/pkg/sentry/loader/loader.go
@@ -21,18 +21,18 @@ import (
"io"
"path"
- "gvisor.googlesource.com/gvisor/pkg/abi"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/cpuid"
- "gvisor.googlesource.com/gvisor/pkg/rand"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/sentry/mm"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserr"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/cpuid"
+ "gvisor.dev/gvisor/pkg/rand"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/mm"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserr"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// readFull behaves like io.ReadFull for an *fs.File.
@@ -54,7 +54,7 @@ func readFull(ctx context.Context, f *fs.File, dst usermem.IOSequence, offset in
// openPath opens name for loading.
//
// openPath returns the fs.Dirent and an *fs.File for name, which is not
-// installed in the Task FDMap. The caller takes ownership of both.
+// installed in the Task FDTable. The caller takes ownership of both.
//
// name must be a readable, executable, regular file.
func openPath(ctx context.Context, mm *fs.MountNamespace, root, wd *fs.Dirent, maxTraversals *uint, name string) (*fs.Dirent, *fs.File, error) {
diff --git a/pkg/sentry/loader/vdso.go b/pkg/sentry/loader/vdso.go
index 4e73527cf..ada28aea3 100644
--- a/pkg/sentry/loader/vdso.go
+++ b/pkg/sentry/loader/vdso.go
@@ -19,22 +19,22 @@ import (
"fmt"
"io"
- "gvisor.googlesource.com/gvisor/pkg/abi"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/anon"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/mm"
- "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
- "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
- "gvisor.googlesource.com/gvisor/pkg/sentry/uniqueid"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/anon"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/mm"
+ "gvisor.dev/gvisor/pkg/sentry/pgalloc"
+ "gvisor.dev/gvisor/pkg/sentry/safemem"
+ "gvisor.dev/gvisor/pkg/sentry/uniqueid"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
type fileContext struct {
@@ -69,11 +69,12 @@ type byteReader struct {
var _ fs.FileOperations = (*byteReader)(nil)
// newByteReaderFile creates a fake file to read data from.
-func newByteReaderFile(data []byte) *fs.File {
+func newByteReaderFile(ctx context.Context, data []byte) *fs.File {
// Create a fake inode.
inode := fs.NewInode(
+ ctx,
&fsutil.SimpleFileInode{},
- fs.NewPseudoMountSource(),
+ fs.NewPseudoMountSource(ctx),
fs.StableAttr{
Type: fs.Anonymous,
DeviceID: anon.PseudoDevice.DeviceID(),
@@ -219,8 +220,8 @@ type VDSO struct {
// PrepareVDSO validates the system VDSO and returns a VDSO, containing the
// param page for updating by the kernel.
-func PrepareVDSO(mfp pgalloc.MemoryFileProvider) (*VDSO, error) {
- vdsoFile := newByteReaderFile(vdsoBin)
+func PrepareVDSO(ctx context.Context, mfp pgalloc.MemoryFileProvider) (*VDSO, error) {
+ vdsoFile := newByteReaderFile(ctx, vdsoBin)
// First make sure the VDSO is valid. vdsoFile does not use ctx, so a
// nil context can be passed.
diff --git a/pkg/sentry/memmap/BUILD b/pkg/sentry/memmap/BUILD
index 9c2cbd18b..29c14ec56 100644
--- a/pkg/sentry/memmap/BUILD
+++ b/pkg/sentry/memmap/BUILD
@@ -36,7 +36,7 @@ go_library(
"mapping_set_impl.go",
"memmap.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/memmap",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/memmap",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/log",
diff --git a/pkg/sentry/memmap/mapping_set.go b/pkg/sentry/memmap/mapping_set.go
index 3cf2b338f..0a5b7ce45 100644
--- a/pkg/sentry/memmap/mapping_set.go
+++ b/pkg/sentry/memmap/mapping_set.go
@@ -18,7 +18,7 @@ import (
"fmt"
"math"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// MappingSet maps offsets into a Mappable to mappings of those offsets. It is
@@ -85,7 +85,7 @@ func (mappingSetFunctions) Merge(r1 MappableRange, val1 MappingsOfRange, r2 Mapp
// Each MappingOfRange in val1 must have a matching region in val2, forming
// one contiguous region.
for k1 := range val1 {
- // We expect val2 to to contain a key that forms a contiguous
+ // We expect val2 to contain a key that forms a contiguous
// region with k1.
k2 := MappingOfRange{
MappingSpace: k1.MappingSpace,
diff --git a/pkg/sentry/memmap/mapping_set_test.go b/pkg/sentry/memmap/mapping_set_test.go
index c702555ce..f9b11a59c 100644
--- a/pkg/sentry/memmap/mapping_set_test.go
+++ b/pkg/sentry/memmap/mapping_set_test.go
@@ -18,7 +18,7 @@ import (
"reflect"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
type testMappingSpace struct {
diff --git a/pkg/sentry/memmap/memmap.go b/pkg/sentry/memmap/memmap.go
index 0106c857d..03b99aaea 100644
--- a/pkg/sentry/memmap/memmap.go
+++ b/pkg/sentry/memmap/memmap.go
@@ -18,10 +18,10 @@ package memmap
import (
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/refs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// Mappable represents a memory-mappable object, a mutable mapping from uint64
diff --git a/pkg/sentry/mm/BUILD b/pkg/sentry/mm/BUILD
index c78cb4280..072745a08 100644
--- a/pkg/sentry/mm/BUILD
+++ b/pkg/sentry/mm/BUILD
@@ -7,7 +7,7 @@ go_template_instance(
name = "file_refcount_set",
out = "file_refcount_set.go",
imports = {
- "platform": "gvisor.googlesource.com/gvisor/pkg/sentry/platform",
+ "platform": "gvisor.dev/gvisor/pkg/sentry/platform",
},
package = "mm",
prefix = "fileRefcount",
@@ -27,7 +27,7 @@ go_template_instance(
"minDegree": "8",
},
imports = {
- "usermem": "gvisor.googlesource.com/gvisor/pkg/sentry/usermem",
+ "usermem": "gvisor.dev/gvisor/pkg/sentry/usermem",
},
package = "mm",
prefix = "vma",
@@ -47,7 +47,7 @@ go_template_instance(
"minDegree": "8",
},
imports = {
- "usermem": "gvisor.googlesource.com/gvisor/pkg/sentry/usermem",
+ "usermem": "gvisor.dev/gvisor/pkg/sentry/usermem",
},
package = "mm",
prefix = "pma",
@@ -95,7 +95,7 @@ go_library(
"vma.go",
"vma_set.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/mm",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/mm",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
diff --git a/pkg/sentry/mm/README.md b/pkg/sentry/mm/README.md
index e6efbf565..e1322e373 100644
--- a/pkg/sentry/mm/README.md
+++ b/pkg/sentry/mm/README.md
@@ -274,7 +274,7 @@ In the sentry:
methods
[`platform.AddressSpace.MapFile` and `platform.AddressSpace.Unmap`][platform].
-[memmap]: https://gvisor.googlesource.com/gvisor/+/master/pkg/sentry/memmap/memmap.go
-[mm]: https://gvisor.googlesource.com/gvisor/+/master/pkg/sentry/mm/mm.go
-[pgalloc]: https://gvisor.googlesource.com/gvisor/+/master/pkg/sentry/pgalloc/pgalloc.go
-[platform]: https://gvisor.googlesource.com/gvisor/+/master/pkg/sentry/platform/platform.go
+[memmap]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/memmap/memmap.go
+[mm]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/mm/mm.go
+[pgalloc]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/pgalloc/pgalloc.go
+[platform]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/platform/platform.go
diff --git a/pkg/sentry/mm/address_space.go b/pkg/sentry/mm/address_space.go
index 06f587fde..cfebcfd42 100644
--- a/pkg/sentry/mm/address_space.go
+++ b/pkg/sentry/mm/address_space.go
@@ -18,9 +18,9 @@ import (
"fmt"
"sync/atomic"
- "gvisor.googlesource.com/gvisor/pkg/atomicbitops"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/atomicbitops"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// AddressSpace returns the platform.AddressSpace bound to mm.
diff --git a/pkg/sentry/mm/aio_context.go b/pkg/sentry/mm/aio_context.go
index 5c61acf36..1b746d030 100644
--- a/pkg/sentry/mm/aio_context.go
+++ b/pkg/sentry/mm/aio_context.go
@@ -17,15 +17,15 @@ package mm
import (
"sync"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/refs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/pgalloc"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// aioManager creates and manages asynchronous I/O contexts.
@@ -213,7 +213,9 @@ func newAIOMappable(mfp pgalloc.MemoryFileProvider) (*aioMappable, error) {
if err != nil {
return nil, err
}
- return &aioMappable{mfp: mfp, fr: fr}, nil
+ m := aioMappable{mfp: mfp, fr: fr}
+ m.EnableLeakCheck("mm.aioMappable")
+ return &m, nil
}
// DecRef implements refs.RefCounter.DecRef.
diff --git a/pkg/sentry/mm/debug.go b/pkg/sentry/mm/debug.go
index fe58cfc4c..df9adf708 100644
--- a/pkg/sentry/mm/debug.go
+++ b/pkg/sentry/mm/debug.go
@@ -18,7 +18,7 @@ import (
"bytes"
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/context"
)
const (
diff --git a/pkg/sentry/mm/io.go b/pkg/sentry/mm/io.go
index e4c057d28..b03e7d020 100644
--- a/pkg/sentry/mm/io.go
+++ b/pkg/sentry/mm/io.go
@@ -15,11 +15,11 @@
package mm
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/safemem"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// There are two supported ways to copy data to/from application virtual
diff --git a/pkg/sentry/mm/lifecycle.go b/pkg/sentry/mm/lifecycle.go
index 7646d5ab2..4e9ca1de6 100644
--- a/pkg/sentry/mm/lifecycle.go
+++ b/pkg/sentry/mm/lifecycle.go
@@ -18,14 +18,14 @@ import (
"fmt"
"sync/atomic"
- "gvisor.googlesource.com/gvisor/pkg/atomicbitops"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/limits"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/atomicbitops"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/pgalloc"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// NewMemoryManager returns a new MemoryManager with no mappings and 1 user.
@@ -86,10 +86,22 @@ func (mm *MemoryManager) Fork(ctx context.Context) (*MemoryManager, error) {
}
// Copy vmas.
+ dontforks := false
dstvgap := mm2.vmas.FirstGap()
for srcvseg := mm.vmas.FirstSegment(); srcvseg.Ok(); srcvseg = srcvseg.NextSegment() {
vma := srcvseg.Value() // makes a copy of the vma
vmaAR := srcvseg.Range()
+
+ if vma.dontfork {
+ length := uint64(vmaAR.Length())
+ mm2.usageAS -= length
+ if vma.isPrivateDataLocked() {
+ mm2.dataAS -= length
+ }
+ dontforks = true
+ continue
+ }
+
// Inform the Mappable, if any, of the new mapping.
if vma.mappable != nil {
if err := vma.mappable.AddMapping(ctx, mm2, vmaAR, vma.off, vma.canWriteMappableLocked()); err != nil {
@@ -118,6 +130,10 @@ func (mm *MemoryManager) Fork(ctx context.Context) (*MemoryManager, error) {
defer mm2.activeMu.Unlock()
mm.activeMu.Lock()
defer mm.activeMu.Unlock()
+ if dontforks {
+ defer mm.pmas.MergeRange(mm.applicationAddrRange())
+ }
+ srcvseg := mm.vmas.FirstSegment()
dstpgap := mm2.pmas.FirstGap()
var unmapAR usermem.AddrRange
for srcpseg := mm.pmas.FirstSegment(); srcpseg.Ok(); srcpseg = srcpseg.NextSegment() {
@@ -125,6 +141,27 @@ func (mm *MemoryManager) Fork(ctx context.Context) (*MemoryManager, error) {
if !pma.private {
continue
}
+
+ if dontforks {
+ // Find the 'vma' that contains the starting address
+ // associated with the 'pma' (there must be one).
+ srcvseg = srcvseg.seekNextLowerBound(srcpseg.Start())
+ if checkInvariants {
+ if !srcvseg.Ok() {
+ panic(fmt.Sprintf("no vma covers pma range %v", srcpseg.Range()))
+ }
+ if srcpseg.Start() < srcvseg.Start() {
+ panic(fmt.Sprintf("vma %v ran ahead of pma %v", srcvseg.Range(), srcpseg.Range()))
+ }
+ }
+
+ srcpseg = mm.pmas.Isolate(srcpseg, srcvseg.Range())
+ if srcvseg.ValuePtr().dontfork {
+ continue
+ }
+ pma = srcpseg.ValuePtr()
+ }
+
if !pma.needCOW {
pma.needCOW = true
if pma.effectivePerms.Write {
diff --git a/pkg/sentry/mm/metadata.go b/pkg/sentry/mm/metadata.go
index c218006ee..d2a01d48a 100644
--- a/pkg/sentry/mm/metadata.go
+++ b/pkg/sentry/mm/metadata.go
@@ -15,9 +15,9 @@
package mm
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// Dumpability describes if and how core dumps should be created.
diff --git a/pkg/sentry/mm/mm.go b/pkg/sentry/mm/mm.go
index 604866d04..f350e0109 100644
--- a/pkg/sentry/mm/mm.go
+++ b/pkg/sentry/mm/mm.go
@@ -37,14 +37,14 @@ package mm
import (
"sync"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/third_party/gvsync"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/pgalloc"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/safemem"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/third_party/gvsync"
)
// MemoryManager implements a virtual address space.
@@ -74,7 +74,7 @@ type MemoryManager struct {
// privateRefs is immutable.
privateRefs *privateRefs
- // users is the number of dependences on the mappings in the MemoryManager.
+ // users is the number of dependencies on the mappings in the MemoryManager.
// When the number of references in users reaches zero, all mappings are
// unmapped.
//
@@ -274,6 +274,9 @@ type vma struct {
// metag, none of which we currently support.
growsDown bool `state:"manual"`
+ // dontfork is the MADV_DONTFORK setting for this vma configured by madvise().
+ dontfork bool
+
mlockMode memmap.MLockMode
// numaPolicy is the NUMA policy for this vma set by mbind().
diff --git a/pkg/sentry/mm/mm_test.go b/pkg/sentry/mm/mm_test.go
index 7209c73ce..4d2bfaaed 100644
--- a/pkg/sentry/mm/mm_test.go
+++ b/pkg/sentry/mm/mm_test.go
@@ -17,15 +17,15 @@ package mm
import (
"testing"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest"
- "gvisor.googlesource.com/gvisor/pkg/sentry/limits"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/pgalloc"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
func testMemoryManager(ctx context.Context) *MemoryManager {
diff --git a/pkg/sentry/mm/pma.go b/pkg/sentry/mm/pma.go
index ece561ff0..c976c6f45 100644
--- a/pkg/sentry/mm/pma.go
+++ b/pkg/sentry/mm/pma.go
@@ -17,14 +17,14 @@ package mm
import (
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/safecopy"
- "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/platform/safecopy"
+ "gvisor.dev/gvisor/pkg/sentry/safemem"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// existingPMAsLocked checks that pmas exist for all addresses in ar, and
diff --git a/pkg/sentry/mm/procfs.go b/pkg/sentry/mm/procfs.go
index c8302a553..a8819aa84 100644
--- a/pkg/sentry/mm/procfs.go
+++ b/pkg/sentry/mm/procfs.go
@@ -19,10 +19,10 @@ import (
"fmt"
"strings"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
const (
diff --git a/pkg/sentry/mm/save_restore.go b/pkg/sentry/mm/save_restore.go
index 0385957bd..93259c5a3 100644
--- a/pkg/sentry/mm/save_restore.go
+++ b/pkg/sentry/mm/save_restore.go
@@ -17,7 +17,7 @@ package mm
import (
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/context"
)
// InvalidateUnsavable invokes memmap.Mappable.InvalidateUnsavable on all
diff --git a/pkg/sentry/mm/shm.go b/pkg/sentry/mm/shm.go
index 12913007b..b9f2d23e5 100644
--- a/pkg/sentry/mm/shm.go
+++ b/pkg/sentry/mm/shm.go
@@ -15,10 +15,10 @@
package mm
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/shm"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/shm"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// DetachShm unmaps a sysv shared memory segment.
diff --git a/pkg/sentry/mm/special_mappable.go b/pkg/sentry/mm/special_mappable.go
index 687959005..ea2d7af74 100644
--- a/pkg/sentry/mm/special_mappable.go
+++ b/pkg/sentry/mm/special_mappable.go
@@ -15,14 +15,14 @@
package mm
import (
- "gvisor.googlesource.com/gvisor/pkg/refs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/pgalloc"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// SpecialMappable implements memmap.MappingIdentity and memmap.Mappable with
@@ -45,7 +45,9 @@ type SpecialMappable struct {
//
// Preconditions: fr.Length() != 0.
func NewSpecialMappable(name string, mfp pgalloc.MemoryFileProvider, fr platform.FileRange) *SpecialMappable {
- return &SpecialMappable{mfp: mfp, fr: fr, name: name}
+ m := SpecialMappable{mfp: mfp, fr: fr, name: name}
+ m.EnableLeakCheck("mm.SpecialMappable")
+ return &m
}
// DecRef implements refs.RefCounter.DecRef.
diff --git a/pkg/sentry/mm/syscalls.go b/pkg/sentry/mm/syscalls.go
index 9cf136532..c2466c988 100644
--- a/pkg/sentry/mm/syscalls.go
+++ b/pkg/sentry/mm/syscalls.go
@@ -18,15 +18,15 @@ import (
"fmt"
mrand "math/rand"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/futex"
- "gvisor.googlesource.com/gvisor/pkg/sentry/limits"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/futex"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/pgalloc"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// HandleUserFault handles an application page fault. sp is the faulting
@@ -1026,6 +1026,32 @@ func (mm *MemoryManager) SetNumaPolicy(addr usermem.Addr, length uint64, policy
}
}
+// SetDontFork implements the semantics of madvise MADV_DONTFORK.
+func (mm *MemoryManager) SetDontFork(addr usermem.Addr, length uint64, dontfork bool) error {
+ ar, ok := addr.ToRange(length)
+ if !ok {
+ return syserror.EINVAL
+ }
+
+ mm.mappingMu.Lock()
+ defer mm.mappingMu.Unlock()
+ defer func() {
+ mm.vmas.MergeRange(ar)
+ mm.vmas.MergeAdjacent(ar)
+ }()
+
+ for vseg := mm.vmas.LowerBoundSegment(ar.Start); vseg.Ok() && vseg.Start() < ar.End; vseg = vseg.NextSegment() {
+ vseg = mm.vmas.Isolate(vseg, ar)
+ vma := vseg.ValuePtr()
+ vma.dontfork = dontfork
+ }
+
+ if mm.vmas.SpanRange(ar) != ar.Length() {
+ return syserror.ENOMEM
+ }
+ return nil
+}
+
// Decommit implements the semantics of Linux's madvise(MADV_DONTNEED).
func (mm *MemoryManager) Decommit(addr usermem.Addr, length uint64) error {
ar, ok := addr.ToRange(length)
diff --git a/pkg/sentry/mm/vma.go b/pkg/sentry/mm/vma.go
index 0af8de5b0..f2fd70799 100644
--- a/pkg/sentry/mm/vma.go
+++ b/pkg/sentry/mm/vma.go
@@ -17,14 +17,14 @@ package mm
import (
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/sentry/limits"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// Preconditions: mm.mappingMu must be locked for writing. opts must be valid
@@ -34,7 +34,7 @@ func (mm *MemoryManager) createVMALocked(ctx context.Context, opts memmap.MMapOp
panic(fmt.Sprintf("Non-effective MaxPerms %s cannot be enforced", opts.MaxPerms))
}
- // Find a useable range.
+ // Find a usable range.
addr, err := mm.findAvailableLocked(opts.Length, findAvailableOpts{
Addr: opts.Addr,
Fixed: opts.Fixed,
@@ -439,6 +439,7 @@ func (vmaSetFunctions) Merge(ar1 usermem.AddrRange, vma1 vma, ar2 usermem.AddrRa
vma1.mlockMode != vma2.mlockMode ||
vma1.numaPolicy != vma2.numaPolicy ||
vma1.numaNodemask != vma2.numaNodemask ||
+ vma1.dontfork != vma2.dontfork ||
vma1.id != vma2.id ||
vma1.hint != vma2.hint {
return vma{}, false
diff --git a/pkg/sentry/pgalloc/BUILD b/pkg/sentry/pgalloc/BUILD
index ca2d5ba6f..858f895f2 100644
--- a/pkg/sentry/pgalloc/BUILD
+++ b/pkg/sentry/pgalloc/BUILD
@@ -35,7 +35,7 @@ go_template_instance(
"minDegree": "10",
},
imports = {
- "platform": "gvisor.googlesource.com/gvisor/pkg/sentry/platform",
+ "platform": "gvisor.dev/gvisor/pkg/sentry/platform",
},
package = "pgalloc",
prefix = "usage",
@@ -59,7 +59,7 @@ go_library(
"save_restore.go",
"usage_set.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/pgalloc",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/log",
diff --git a/pkg/sentry/pgalloc/context.go b/pkg/sentry/pgalloc/context.go
index cb9809b1f..11ccf897b 100644
--- a/pkg/sentry/pgalloc/context.go
+++ b/pkg/sentry/pgalloc/context.go
@@ -15,7 +15,7 @@
package pgalloc
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/context"
)
// contextID is this package's type for context.Context.Value keys.
diff --git a/pkg/sentry/pgalloc/pgalloc.go b/pkg/sentry/pgalloc/pgalloc.go
index 6d91f1a7b..8bd3e885d 100644
--- a/pkg/sentry/pgalloc/pgalloc.go
+++ b/pkg/sentry/pgalloc/pgalloc.go
@@ -30,14 +30,14 @@ import (
"syscall"
"time"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/hostmm"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/hostmm"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/safemem"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// MemoryFile is a platform.File whose pages may be allocated to arbitrary
diff --git a/pkg/sentry/pgalloc/pgalloc_test.go b/pkg/sentry/pgalloc/pgalloc_test.go
index 14a39bb9e..428e6a859 100644
--- a/pkg/sentry/pgalloc/pgalloc_test.go
+++ b/pkg/sentry/pgalloc/pgalloc_test.go
@@ -17,7 +17,7 @@ package pgalloc
import (
"testing"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
const (
diff --git a/pkg/sentry/pgalloc/save_restore.go b/pkg/sentry/pgalloc/save_restore.go
index d4ba384b1..1effc7735 100644
--- a/pkg/sentry/pgalloc/save_restore.go
+++ b/pkg/sentry/pgalloc/save_restore.go
@@ -22,10 +22,10 @@ import (
"sync/atomic"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/state"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/state"
)
// SaveTo writes f's state to the given stream.
diff --git a/pkg/sentry/platform/BUILD b/pkg/sentry/platform/BUILD
index ac8a6cb7f..9aa6ec507 100644
--- a/pkg/sentry/platform/BUILD
+++ b/pkg/sentry/platform/BUILD
@@ -22,12 +22,13 @@ go_library(
"mmap_min_addr.go",
"platform.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/platform",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/platform",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
"//pkg/atomicbitops",
"//pkg/log",
+ "//pkg/seccomp",
"//pkg/sentry/arch",
"//pkg/sentry/context",
"//pkg/sentry/platform/safecopy",
diff --git a/pkg/sentry/platform/context.go b/pkg/sentry/platform/context.go
index 793f57fd7..e29bc4485 100644
--- a/pkg/sentry/platform/context.go
+++ b/pkg/sentry/platform/context.go
@@ -15,7 +15,7 @@
package platform
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/context"
)
// contextID is the auth package's type for context.Context.Value keys.
diff --git a/pkg/sentry/platform/interrupt/BUILD b/pkg/sentry/platform/interrupt/BUILD
index eeccd4d0e..eeb634644 100644
--- a/pkg/sentry/platform/interrupt/BUILD
+++ b/pkg/sentry/platform/interrupt/BUILD
@@ -7,7 +7,7 @@ go_library(
srcs = [
"interrupt.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/platform/interrupt",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/platform/interrupt",
visibility = ["//pkg/sentry:internal"],
)
diff --git a/pkg/sentry/platform/kvm/BUILD b/pkg/sentry/platform/kvm/BUILD
index 2931d6ddc..ad8b95744 100644
--- a/pkg/sentry/platform/kvm/BUILD
+++ b/pkg/sentry/platform/kvm/BUILD
@@ -14,6 +14,7 @@ go_library(
"bluepill_fault.go",
"bluepill_unsafe.go",
"context.go",
+ "filters.go",
"kvm.go",
"kvm_amd64.go",
"kvm_amd64_unsafe.go",
@@ -25,7 +26,7 @@ go_library(
"physical_map.go",
"virtual_map.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/platform/kvm",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/platform/kvm",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
@@ -33,6 +34,7 @@ go_library(
"//pkg/cpuid",
"//pkg/log",
"//pkg/procid",
+ "//pkg/seccomp",
"//pkg/sentry/arch",
"//pkg/sentry/platform",
"//pkg/sentry/platform/interrupt",
diff --git a/pkg/sentry/platform/kvm/address_space.go b/pkg/sentry/platform/kvm/address_space.go
index 689122175..acd41f73d 100644
--- a/pkg/sentry/platform/kvm/address_space.go
+++ b/pkg/sentry/platform/kvm/address_space.go
@@ -18,10 +18,10 @@ import (
"sync"
"sync/atomic"
- "gvisor.googlesource.com/gvisor/pkg/atomicbitops"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0/pagetables"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/atomicbitops"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// dirtySet tracks vCPUs for invalidation.
diff --git a/pkg/sentry/platform/kvm/allocator.go b/pkg/sentry/platform/kvm/allocator.go
index 42bcc9733..80942e9c9 100644
--- a/pkg/sentry/platform/kvm/allocator.go
+++ b/pkg/sentry/platform/kvm/allocator.go
@@ -17,7 +17,7 @@ package kvm
import (
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0/pagetables"
+ "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
)
type allocator struct {
diff --git a/pkg/sentry/platform/kvm/bluepill.go b/pkg/sentry/platform/kvm/bluepill.go
index a926e6f8b..043de51b3 100644
--- a/pkg/sentry/platform/kvm/bluepill.go
+++ b/pkg/sentry/platform/kvm/bluepill.go
@@ -19,8 +19,8 @@ import (
"reflect"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/safecopy"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/platform/safecopy"
)
// bluepill enters guest mode.
diff --git a/pkg/sentry/platform/kvm/bluepill_amd64.go b/pkg/sentry/platform/kvm/bluepill_amd64.go
index c258408f9..421c88220 100644
--- a/pkg/sentry/platform/kvm/bluepill_amd64.go
+++ b/pkg/sentry/platform/kvm/bluepill_amd64.go
@@ -19,8 +19,8 @@ package kvm
import (
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
)
var (
diff --git a/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go b/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go
index 92fde7ee0..9d8af143e 100644
--- a/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go
+++ b/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go
@@ -19,8 +19,8 @@ package kvm
import (
"unsafe"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
)
// bluepillArchContext returns the arch-specific context.
@@ -30,7 +30,7 @@ func bluepillArchContext(context unsafe.Pointer) *arch.SignalContext64 {
return &((*arch.UContext64)(context).MContext)
}
-// dieArchSetup initialies the state for dieTrampoline.
+// dieArchSetup initializes the state for dieTrampoline.
//
// The amd64 dieTrampoline requires the vCPU to be set in BX, and the last RIP
// to be in AX. The trampoline then simulates a call to dieHandler from the
diff --git a/pkg/sentry/platform/kvm/bluepill_fault.go b/pkg/sentry/platform/kvm/bluepill_fault.go
index 3c452f5ba..b97476053 100644
--- a/pkg/sentry/platform/kvm/bluepill_fault.go
+++ b/pkg/sentry/platform/kvm/bluepill_fault.go
@@ -18,7 +18,7 @@ import (
"sync/atomic"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
const (
diff --git a/pkg/sentry/platform/kvm/context.go b/pkg/sentry/platform/kvm/context.go
index 0eb0020f7..99450d22d 100644
--- a/pkg/sentry/platform/kvm/context.go
+++ b/pkg/sentry/platform/kvm/context.go
@@ -15,11 +15,11 @@
package kvm
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/interrupt"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/platform/interrupt"
+ "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// context is an implementation of the platform context.
diff --git a/pkg/sentry/platform/kvm/filters.go b/pkg/sentry/platform/kvm/filters.go
new file mode 100644
index 000000000..7d949f1dd
--- /dev/null
+++ b/pkg/sentry/platform/kvm/filters.go
@@ -0,0 +1,33 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package kvm
+
+import (
+ "syscall"
+
+ "gvisor.dev/gvisor/pkg/seccomp"
+)
+
+// SyscallFilters returns syscalls made exclusively by the KVM platform.
+func (*KVM) SyscallFilters() seccomp.SyscallRules {
+ return seccomp.SyscallRules{
+ syscall.SYS_ARCH_PRCTL: {},
+ syscall.SYS_IOCTL: {},
+ syscall.SYS_MMAP: {},
+ syscall.SYS_RT_SIGSUSPEND: {},
+ syscall.SYS_RT_SIGTIMEDWAIT: {},
+ 0xffffffffffffffff: {}, // KVM uses syscall -1 to transition to host.
+ }
+}
diff --git a/pkg/sentry/platform/kvm/kvm.go b/pkg/sentry/platform/kvm/kvm.go
index ed0521c3f..ee4cd2f4d 100644
--- a/pkg/sentry/platform/kvm/kvm.go
+++ b/pkg/sentry/platform/kvm/kvm.go
@@ -21,11 +21,11 @@ import (
"sync"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/cpuid"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0/pagetables"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/cpuid"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
+ "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// KVM represents a lightweight VM context.
@@ -141,3 +141,17 @@ func (k *KVM) NewContext() platform.Context {
machine: k.machine,
}
}
+
+type constructor struct{}
+
+func (*constructor) New(f *os.File) (platform.Platform, error) {
+ return New(f)
+}
+
+func (*constructor) OpenDevice() (*os.File, error) {
+ return OpenDevice()
+}
+
+func init() {
+ platform.Register("kvm", &constructor{})
+}
diff --git a/pkg/sentry/platform/kvm/kvm_amd64.go b/pkg/sentry/platform/kvm/kvm_amd64.go
index 61493ccaf..5d8ef4761 100644
--- a/pkg/sentry/platform/kvm/kvm_amd64.go
+++ b/pkg/sentry/platform/kvm/kvm_amd64.go
@@ -17,7 +17,7 @@
package kvm
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0"
+ "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
)
// userMemoryRegion is a region of physical memory.
diff --git a/pkg/sentry/platform/kvm/kvm_test.go b/pkg/sentry/platform/kvm/kvm_test.go
index e83db71e9..30df725d4 100644
--- a/pkg/sentry/platform/kvm/kvm_test.go
+++ b/pkg/sentry/platform/kvm/kvm_test.go
@@ -22,12 +22,12 @@ import (
"testing"
"time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/kvm/testutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0/pagetables"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/platform/kvm/testutil"
+ "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
+ "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
var dummyFPState = (*byte)(arch.NewFloatingPointData())
diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go
index f8ccd86af..679087e25 100644
--- a/pkg/sentry/platform/kvm/machine.go
+++ b/pkg/sentry/platform/kvm/machine.go
@@ -21,12 +21,12 @@ import (
"sync/atomic"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/atomicbitops"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/procid"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0/pagetables"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/atomicbitops"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/procid"
+ "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
+ "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// machine contains state associated with the VM as a whole.
@@ -135,7 +135,7 @@ type dieState struct {
// newVCPU creates a returns a new vCPU.
//
-// Precondtion: mu must be held.
+// Precondition: mu must be held.
func (m *machine) newVCPU() *vCPU {
id := len(m.vCPUs)
@@ -426,7 +426,12 @@ func (c *vCPU) unlock() {
// Normal state.
case vCPUUser | vCPUGuest | vCPUWaiter:
// Force a transition: this must trigger a notification when we
- // return from guest mode.
+ // return from guest mode. We must clear vCPUWaiter here
+ // anyways, because BounceToKernel will force a transition only
+ // from ring3 to ring0, which will not clear this bit. Halt may
+ // workaround the issue, but if there is no exception or
+ // syscall in this period, BounceToKernel will hang.
+ atomicbitops.AndUint32(&c.state, ^vCPUWaiter)
c.notify()
case vCPUUser | vCPUWaiter:
// Waiting for the lock to be released; the responsibility is
diff --git a/pkg/sentry/platform/kvm/machine_amd64.go b/pkg/sentry/platform/kvm/machine_amd64.go
index b6821122a..c1cbe33be 100644
--- a/pkg/sentry/platform/kvm/machine_amd64.go
+++ b/pkg/sentry/platform/kvm/machine_amd64.go
@@ -22,11 +22,11 @@ import (
"runtime/debug"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0/pagetables"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
+ "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// initArchState initializes architecture-specific state.
diff --git a/pkg/sentry/platform/kvm/machine_amd64_unsafe.go b/pkg/sentry/platform/kvm/machine_amd64_unsafe.go
index 06a2e3b0c..506ec9af1 100644
--- a/pkg/sentry/platform/kvm/machine_amd64_unsafe.go
+++ b/pkg/sentry/platform/kvm/machine_amd64_unsafe.go
@@ -22,8 +22,8 @@ import (
"syscall"
"unsafe"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/time"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/time"
)
// setMemoryRegion initializes a region.
@@ -87,7 +87,7 @@ func (c *vCPU) setCPUID() error {
// setSystemTime sets the TSC for the vCPU.
//
-// This has to make the call many times in order to minimize the intrinstic
+// This has to make the call many times in order to minimize the intrinsic
// error in the offset. Unfortunately KVM does not expose a relative offset via
// the API, so this is an approximation. We do this via an iterative algorithm.
// This has the advantage that it can generally deal with highly variable
diff --git a/pkg/sentry/platform/kvm/machine_unsafe.go b/pkg/sentry/platform/kvm/machine_unsafe.go
index 1d3c6d2d6..8d76e106e 100644
--- a/pkg/sentry/platform/kvm/machine_unsafe.go
+++ b/pkg/sentry/platform/kvm/machine_unsafe.go
@@ -25,7 +25,7 @@ import (
"syscall"
"unsafe"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/abi/linux"
)
//go:linkname entersyscall runtime.entersyscall
diff --git a/pkg/sentry/platform/kvm/physical_map.go b/pkg/sentry/platform/kvm/physical_map.go
index 450eb8201..586e91bb2 100644
--- a/pkg/sentry/platform/kvm/physical_map.go
+++ b/pkg/sentry/platform/kvm/physical_map.go
@@ -19,9 +19,9 @@ import (
"sort"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
const (
diff --git a/pkg/sentry/platform/kvm/testutil/BUILD b/pkg/sentry/platform/kvm/testutil/BUILD
index e10087e8e..77a449a8b 100644
--- a/pkg/sentry/platform/kvm/testutil/BUILD
+++ b/pkg/sentry/platform/kvm/testutil/BUILD
@@ -10,6 +10,6 @@ go_library(
"testutil_amd64.go",
"testutil_amd64.s",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/platform/kvm/testutil",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/platform/kvm/testutil",
visibility = ["//pkg/sentry/platform/kvm:__pkg__"],
)
diff --git a/pkg/sentry/platform/kvm/virtual_map.go b/pkg/sentry/platform/kvm/virtual_map.go
index 28a1b4414..2d68855ef 100644
--- a/pkg/sentry/platform/kvm/virtual_map.go
+++ b/pkg/sentry/platform/kvm/virtual_map.go
@@ -22,7 +22,7 @@ import (
"regexp"
"strconv"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
type virtualRegion struct {
diff --git a/pkg/sentry/platform/kvm/virtual_map_test.go b/pkg/sentry/platform/kvm/virtual_map_test.go
index d03ec654a..6a2f145be 100644
--- a/pkg/sentry/platform/kvm/virtual_map_test.go
+++ b/pkg/sentry/platform/kvm/virtual_map_test.go
@@ -18,7 +18,7 @@ import (
"syscall"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
type checker struct {
diff --git a/pkg/sentry/platform/mmap_min_addr.go b/pkg/sentry/platform/mmap_min_addr.go
index 90976735b..999787462 100644
--- a/pkg/sentry/platform/mmap_min_addr.go
+++ b/pkg/sentry/platform/mmap_min_addr.go
@@ -20,7 +20,7 @@ import (
"strconv"
"strings"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// systemMMapMinAddrSource is the source file.
diff --git a/pkg/sentry/platform/platform.go b/pkg/sentry/platform/platform.go
index ae37276ad..ec22dbf87 100644
--- a/pkg/sentry/platform/platform.go
+++ b/pkg/sentry/platform/platform.go
@@ -19,11 +19,13 @@ package platform
import (
"fmt"
+ "os"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/seccomp"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/safemem"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// Platform provides abstractions for execution contexts (Context,
@@ -93,6 +95,9 @@ type Platform interface {
// Platforms for which this does not hold may panic if PreemptAllCPUs is
// called.
PreemptAllCPUs() error
+
+ // SyscallFilters returns syscalls made exclusively by this platform.
+ SyscallFilters() seccomp.SyscallRules
}
// NoCPUPreemptionDetection implements Platform.DetectsCPUPreemption and
@@ -256,7 +261,7 @@ type AddressSpaceIO interface {
LoadUint32(addr usermem.Addr) (uint32, error)
}
-// NoAddressSpaceIO implements AddressSpaceIO methods by panicing.
+// NoAddressSpaceIO implements AddressSpaceIO methods by panicking.
type NoAddressSpaceIO struct{}
// CopyOut implements AddressSpaceIO.CopyOut.
@@ -347,3 +352,26 @@ type File interface {
func (fr FileRange) String() string {
return fmt.Sprintf("[%#x, %#x)", fr.Start, fr.End)
}
+
+// Constructor represents a platform type.
+type Constructor interface {
+ New(deviceFile *os.File) (Platform, error)
+ OpenDevice() (*os.File, error)
+}
+
+// platforms contains all available platform types.
+var platforms = map[string]Constructor{}
+
+// Register registers a new platform type.
+func Register(name string, platform Constructor) {
+ platforms[name] = platform
+}
+
+// Lookup looks up the platform constructor by name.
+func Lookup(name string) (Constructor, error) {
+ p, ok := platforms[name]
+ if !ok {
+ return nil, fmt.Errorf("unknown platform: %v", name)
+ }
+ return p, nil
+}
diff --git a/pkg/sentry/platform/ptrace/BUILD b/pkg/sentry/platform/ptrace/BUILD
index 434d003a3..1b6c54e96 100644
--- a/pkg/sentry/platform/ptrace/BUILD
+++ b/pkg/sentry/platform/ptrace/BUILD
@@ -5,6 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "ptrace",
srcs = [
+ "filters.go",
"ptrace.go",
"ptrace_unsafe.go",
"stub_amd64.s",
@@ -15,7 +16,7 @@ go_library(
"subprocess_linux_amd64_unsafe.go",
"subprocess_unsafe.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ptrace",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/platform/ptrace",
visibility = ["//:sandbox"],
deps = [
"//pkg/abi/linux",
diff --git a/pkg/sentry/platform/ptrace/filters.go b/pkg/sentry/platform/ptrace/filters.go
new file mode 100644
index 000000000..1e07cfd0d
--- /dev/null
+++ b/pkg/sentry/platform/ptrace/filters.go
@@ -0,0 +1,33 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ptrace
+
+import (
+ "syscall"
+
+ "golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/seccomp"
+)
+
+// SyscallFilters returns syscalls made exclusively by the ptrace platform.
+func (*PTrace) SyscallFilters() seccomp.SyscallRules {
+ return seccomp.SyscallRules{
+ unix.SYS_GETCPU: {},
+ unix.SYS_SCHED_SETAFFINITY: {},
+ syscall.SYS_PTRACE: {},
+ syscall.SYS_TGKILL: {},
+ syscall.SYS_WAIT4: {},
+ }
+}
diff --git a/pkg/sentry/platform/ptrace/ptrace.go b/pkg/sentry/platform/ptrace/ptrace.go
index 6a890dd81..6fd30ed25 100644
--- a/pkg/sentry/platform/ptrace/ptrace.go
+++ b/pkg/sentry/platform/ptrace/ptrace.go
@@ -45,13 +45,14 @@
package ptrace
import (
+ "os"
"sync"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/interrupt"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/platform/interrupt"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
var (
@@ -236,3 +237,17 @@ func (p *PTrace) NewAddressSpace(_ interface{}) (platform.AddressSpace, <-chan s
func (*PTrace) NewContext() platform.Context {
return &context{}
}
+
+type constructor struct{}
+
+func (*constructor) New(*os.File) (platform.Platform, error) {
+ return New()
+}
+
+func (*constructor) OpenDevice() (*os.File, error) {
+ return nil, nil
+}
+
+func init() {
+ platform.Register("ptrace", &constructor{})
+}
diff --git a/pkg/sentry/platform/ptrace/ptrace_unsafe.go b/pkg/sentry/platform/ptrace/ptrace_unsafe.go
index 585f6c1fb..2706039a5 100644
--- a/pkg/sentry/platform/ptrace/ptrace_unsafe.go
+++ b/pkg/sentry/platform/ptrace/ptrace_unsafe.go
@@ -18,8 +18,8 @@ import (
"syscall"
"unsafe"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// GETREGSET/SETREGSET register set types.
diff --git a/pkg/sentry/platform/ptrace/stub_unsafe.go b/pkg/sentry/platform/ptrace/stub_unsafe.go
index 54d5021a9..aa1b87237 100644
--- a/pkg/sentry/platform/ptrace/stub_unsafe.go
+++ b/pkg/sentry/platform/ptrace/stub_unsafe.go
@@ -19,8 +19,8 @@ import (
"syscall"
"unsafe"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/safecopy"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/platform/safecopy"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// stub is defined in arch-specific assembly.
diff --git a/pkg/sentry/platform/ptrace/subprocess.go b/pkg/sentry/platform/ptrace/subprocess.go
index d3b196924..15e84735e 100644
--- a/pkg/sentry/platform/ptrace/subprocess.go
+++ b/pkg/sentry/platform/ptrace/subprocess.go
@@ -21,10 +21,11 @@ import (
"sync"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/procid"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/procid"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// globalPool exists to solve two distinct problems:
@@ -122,7 +123,7 @@ type subprocess struct {
contexts map[*context]struct{}
}
-// newSubprocess returns a useable subprocess.
+// newSubprocess returns a usable subprocess.
//
// This will either be a newly created subprocess, or one from the global pool.
// The create function will be called in the latter case, which is guaranteed
@@ -154,6 +155,7 @@ func newSubprocess(create func() (*thread, error)) (*subprocess, error) {
errChan <- err
return
}
+ firstThread.grabInitRegs()
// Ready to handle requests.
errChan <- nil
@@ -178,6 +180,7 @@ func newSubprocess(create func() (*thread, error)) (*subprocess, error) {
// Detach the thread.
t.detach()
+ t.initRegs = firstThread.initRegs
// Return the thread.
r <- t
@@ -252,7 +255,7 @@ func (s *subprocess) newThread() *thread {
return t
}
-// attach attachs to the thread.
+// attach attaches to the thread.
func (t *thread) attach() {
if _, _, errno := syscall.RawSyscall(syscall.SYS_PTRACE, syscall.PTRACE_ATTACH, uintptr(t.tid), 0); errno != 0 {
panic(fmt.Sprintf("unable to attach: %v", errno))
@@ -268,7 +271,9 @@ func (t *thread) attach() {
// Initialize options.
t.init()
+}
+func (t *thread) grabInitRegs() {
// Grab registers.
//
// Note that we adjust the current register RIP value to be just before
@@ -280,9 +285,9 @@ func (t *thread) attach() {
t.initRegs.Rip -= initRegsRipAdjustment
}
-// detach detachs from the thread.
+// detach detaches from the thread.
//
-// Because the SIGSTOP is not supressed, the thread will enter group-stop.
+// Because the SIGSTOP is not suppressed, the thread will enter group-stop.
func (t *thread) detach() {
if _, _, errno := syscall.RawSyscall6(syscall.SYS_PTRACE, syscall.PTRACE_DETACH, uintptr(t.tid), 0, uintptr(syscall.SIGSTOP), 0, 0); errno != 0 {
panic(fmt.Sprintf("can't detach new clone: %v", errno))
@@ -300,6 +305,18 @@ const (
killed
)
+func (t *thread) dumpAndPanic(message string) {
+ var regs syscall.PtraceRegs
+ message += "\n"
+ if err := t.getRegs(&regs); err == nil {
+ message += dumpRegs(&regs)
+ } else {
+ log.Warningf("unable to get registers: %v", err)
+ }
+ message += fmt.Sprintf("stubStart\t = %016x\n", stubStart)
+ panic(message)
+}
+
// wait waits for a stop event.
//
// Precondition: outcome is a valid waitOutcome.
@@ -320,7 +337,7 @@ func (t *thread) wait(outcome waitOutcome) syscall.Signal {
switch outcome {
case stopped:
if !status.Stopped() {
- panic(fmt.Sprintf("ptrace status unexpected: got %v, wanted stopped", status))
+ t.dumpAndPanic(fmt.Sprintf("ptrace status unexpected: got %v, wanted stopped", status))
}
stopSig := status.StopSignal()
if stopSig == 0 {
@@ -334,12 +351,12 @@ func (t *thread) wait(outcome waitOutcome) syscall.Signal {
return stopSig
case killed:
if !status.Exited() && !status.Signaled() {
- panic(fmt.Sprintf("ptrace status unexpected: got %v, wanted exited", status))
+ t.dumpAndPanic(fmt.Sprintf("ptrace status unexpected: got %v, wanted exited", status))
}
return syscall.Signal(status.ExitStatus())
default:
// Should not happen.
- panic(fmt.Sprintf("unknown outcome: %v", outcome))
+ t.dumpAndPanic(fmt.Sprintf("unknown outcome: %v", outcome))
}
}
}
@@ -357,13 +374,16 @@ func (t *thread) destroy() {
// init initializes trace options.
func (t *thread) init() {
- // Set our TRACESYSGOOD option to differeniate real SIGTRAP.
+ // Set the TRACESYSGOOD option to differentiate real SIGTRAP.
+ // set PTRACE_O_EXITKILL to ensure that the unexpected exit of the
+ // sentry will immediately kill the associated stubs.
+ const PTRACE_O_EXITKILL = 0x100000
_, _, errno := syscall.RawSyscall6(
syscall.SYS_PTRACE,
syscall.PTRACE_SETOPTIONS,
uintptr(t.tid),
0,
- syscall.PTRACE_O_TRACESYSGOOD,
+ syscall.PTRACE_O_TRACESYSGOOD|syscall.PTRACE_O_TRACEEXIT|PTRACE_O_EXITKILL,
0, 0)
if errno != 0 {
panic(fmt.Sprintf("ptrace set options failed: %v", errno))
@@ -406,7 +426,7 @@ func (t *thread) syscall(regs *syscall.PtraceRegs) (uintptr, error) {
// between syscall-enter-stop and syscall-exit-stop; it happens *after*
// syscall-exit-stop.)" - ptrace(2), "Syscall-stops"
if sig := t.wait(stopped); sig != (syscallEvent | syscall.SIGTRAP) {
- panic(fmt.Sprintf("wait failed: expected SIGTRAP, got %v [%d]", sig, sig))
+ t.dumpAndPanic(fmt.Sprintf("wait failed: expected SIGTRAP, got %v [%d]", sig, sig))
}
// Grab registers.
@@ -538,7 +558,7 @@ func (s *subprocess) switchToApp(c *context, ac arch.Context) bool {
if c.signalInfo.Code > 0 {
// The signal was generated by the kernel. We inspect
// the signal information, and may patch it in order to
- // faciliate vsyscall emulation. See patchSignalInfo.
+ // facilitate vsyscall emulation. See patchSignalInfo.
patchSignalInfo(regs, &c.signalInfo)
return false
} else if c.signalInfo.Code <= 0 && c.signalInfo.Pid() == int32(os.Getpid()) {
diff --git a/pkg/sentry/platform/ptrace/subprocess_amd64.go b/pkg/sentry/platform/ptrace/subprocess_amd64.go
index 77a0e908f..a70512913 100644
--- a/pkg/sentry/platform/ptrace/subprocess_amd64.go
+++ b/pkg/sentry/platform/ptrace/subprocess_amd64.go
@@ -17,9 +17,11 @@
package ptrace
import (
+ "fmt"
+ "strings"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
)
const (
@@ -102,3 +104,38 @@ func syscallReturnValue(regs *syscall.PtraceRegs) (uintptr, error) {
}
return uintptr(rval), nil
}
+
+func dumpRegs(regs *syscall.PtraceRegs) string {
+ var m strings.Builder
+
+ fmt.Fprintf(&m, "Registers:\n")
+ fmt.Fprintf(&m, "\tR15\t = %016x\n", regs.R15)
+ fmt.Fprintf(&m, "\tR14\t = %016x\n", regs.R14)
+ fmt.Fprintf(&m, "\tR13\t = %016x\n", regs.R13)
+ fmt.Fprintf(&m, "\tR12\t = %016x\n", regs.R12)
+ fmt.Fprintf(&m, "\tRbp\t = %016x\n", regs.Rbp)
+ fmt.Fprintf(&m, "\tRbx\t = %016x\n", regs.Rbx)
+ fmt.Fprintf(&m, "\tR11\t = %016x\n", regs.R11)
+ fmt.Fprintf(&m, "\tR10\t = %016x\n", regs.R10)
+ fmt.Fprintf(&m, "\tR9\t = %016x\n", regs.R9)
+ fmt.Fprintf(&m, "\tR8\t = %016x\n", regs.R8)
+ fmt.Fprintf(&m, "\tRax\t = %016x\n", regs.Rax)
+ fmt.Fprintf(&m, "\tRcx\t = %016x\n", regs.Rcx)
+ fmt.Fprintf(&m, "\tRdx\t = %016x\n", regs.Rdx)
+ fmt.Fprintf(&m, "\tRsi\t = %016x\n", regs.Rsi)
+ fmt.Fprintf(&m, "\tRdi\t = %016x\n", regs.Rdi)
+ fmt.Fprintf(&m, "\tOrig_rax = %016x\n", regs.Orig_rax)
+ fmt.Fprintf(&m, "\tRip\t = %016x\n", regs.Rip)
+ fmt.Fprintf(&m, "\tCs\t = %016x\n", regs.Cs)
+ fmt.Fprintf(&m, "\tEflags\t = %016x\n", regs.Eflags)
+ fmt.Fprintf(&m, "\tRsp\t = %016x\n", regs.Rsp)
+ fmt.Fprintf(&m, "\tSs\t = %016x\n", regs.Ss)
+ fmt.Fprintf(&m, "\tFs_base\t = %016x\n", regs.Fs_base)
+ fmt.Fprintf(&m, "\tGs_base\t = %016x\n", regs.Gs_base)
+ fmt.Fprintf(&m, "\tDs\t = %016x\n", regs.Ds)
+ fmt.Fprintf(&m, "\tEs\t = %016x\n", regs.Es)
+ fmt.Fprintf(&m, "\tFs\t = %016x\n", regs.Fs)
+ fmt.Fprintf(&m, "\tGs\t = %016x\n", regs.Gs)
+
+ return m.String()
+}
diff --git a/pkg/sentry/platform/ptrace/subprocess_linux.go b/pkg/sentry/platform/ptrace/subprocess_linux.go
index 914be7486..87ded0bbd 100644
--- a/pkg/sentry/platform/ptrace/subprocess_linux.go
+++ b/pkg/sentry/platform/ptrace/subprocess_linux.go
@@ -20,11 +20,11 @@ import (
"fmt"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/procid"
- "gvisor.googlesource.com/gvisor/pkg/seccomp"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/procid"
+ "gvisor.dev/gvisor/pkg/seccomp"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
)
const syscallEvent syscall.Signal = 0x80
@@ -235,6 +235,7 @@ func attachedThread(flags uintptr, defaultAction linux.BPFAction) (*thread, erro
return nil, fmt.Errorf("wait failed: expected SIGSTOP, got %v", sig)
}
t.attach()
+ t.grabInitRegs()
return t, nil
}
@@ -305,7 +306,7 @@ func (s *subprocess) createStub() (*thread, error) {
arch.SyscallArgument{Value: 0},
arch.SyscallArgument{Value: 0})
if err != nil {
- return nil, err
+ return nil, fmt.Errorf("creating stub process: %v", err)
}
// Wait for child to enter group-stop, so we don't stop its
@@ -324,7 +325,7 @@ func (s *subprocess) createStub() (*thread, error) {
arch.SyscallArgument{Value: 0},
arch.SyscallArgument{Value: 0})
if err != nil {
- return nil, err
+ return nil, fmt.Errorf("waiting on stub process: %v", err)
}
childT := &thread{
diff --git a/pkg/sentry/platform/ptrace/subprocess_linux_amd64_unsafe.go b/pkg/sentry/platform/ptrace/subprocess_linux_amd64_unsafe.go
index 1bf7eab28..e977992f9 100644
--- a/pkg/sentry/platform/ptrace/subprocess_linux_amd64_unsafe.go
+++ b/pkg/sentry/platform/ptrace/subprocess_linux_amd64_unsafe.go
@@ -23,7 +23,7 @@ import (
"unsafe"
"golang.org/x/sys/unix"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/abi/linux"
)
// maskPool contains reusable CPU masks for setting affinity. Unfortunately,
diff --git a/pkg/sentry/platform/ring0/BUILD b/pkg/sentry/platform/ring0/BUILD
index ecb3e9a9c..8ed6c7652 100644
--- a/pkg/sentry/platform/ring0/BUILD
+++ b/pkg/sentry/platform/ring0/BUILD
@@ -43,7 +43,7 @@ go_library(
"lib_amd64.s",
"ring0.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/platform/ring0",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/cpuid",
diff --git a/pkg/sentry/platform/ring0/defs.go b/pkg/sentry/platform/ring0/defs.go
index 5bbd4612d..076063f85 100644
--- a/pkg/sentry/platform/ring0/defs.go
+++ b/pkg/sentry/platform/ring0/defs.go
@@ -17,7 +17,7 @@ package ring0
import (
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
var (
diff --git a/pkg/sentry/platform/ring0/defs_amd64.go b/pkg/sentry/platform/ring0/defs_amd64.go
index 413c3dbc4..7206322b1 100644
--- a/pkg/sentry/platform/ring0/defs_amd64.go
+++ b/pkg/sentry/platform/ring0/defs_amd64.go
@@ -17,7 +17,7 @@
package ring0
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0/pagetables"
+ "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
)
// Segment indices and Selectors.
diff --git a/pkg/sentry/platform/ring0/entry_amd64.s b/pkg/sentry/platform/ring0/entry_amd64.s
index 8cb8c4996..02df38331 100644
--- a/pkg/sentry/platform/ring0/entry_amd64.s
+++ b/pkg/sentry/platform/ring0/entry_amd64.s
@@ -15,7 +15,7 @@
#include "funcdata.h"
#include "textflag.h"
-// NB: Offsets are programatically generated (see BUILD).
+// NB: Offsets are programmatically generated (see BUILD).
//
// This file is concatenated with the definitions.
diff --git a/pkg/sentry/platform/ring0/kernel_amd64.go b/pkg/sentry/platform/ring0/kernel_amd64.go
index 3577b5127..0feff8778 100644
--- a/pkg/sentry/platform/ring0/kernel_amd64.go
+++ b/pkg/sentry/platform/ring0/kernel_amd64.go
@@ -70,6 +70,14 @@ func (c *CPU) init() {
c.tss.ist1Lo = uint32(stackAddr)
c.tss.ist1Hi = uint32(stackAddr >> 32)
+ // Set the I/O bitmap base address beyond the last byte in the TSS
+ // to block access to the entire I/O address range.
+ //
+ // From section 18.5.2 "I/O Permission Bit Map" from Intel SDM vol1:
+ // I/O addresses not spanned by the map are treated as if they had set
+ // bits in the map.
+ c.tss.ioPerm = tssLimit + 1
+
// Permanently set the kernel segments.
c.registers.Cs = uint64(Kcode)
c.registers.Ds = uint64(Kdata)
diff --git a/pkg/sentry/platform/ring0/lib_amd64.go b/pkg/sentry/platform/ring0/lib_amd64.go
index 9c5f26962..ca968a036 100644
--- a/pkg/sentry/platform/ring0/lib_amd64.go
+++ b/pkg/sentry/platform/ring0/lib_amd64.go
@@ -17,7 +17,7 @@
package ring0
import (
- "gvisor.googlesource.com/gvisor/pkg/cpuid"
+ "gvisor.dev/gvisor/pkg/cpuid"
)
// LoadFloatingPoint loads floating point state by the most efficient mechanism
diff --git a/pkg/sentry/platform/ring0/pagetables/BUILD b/pkg/sentry/platform/ring0/pagetables/BUILD
index fe93d3030..3b95af617 100644
--- a/pkg/sentry/platform/ring0/pagetables/BUILD
+++ b/pkg/sentry/platform/ring0/pagetables/BUILD
@@ -84,7 +84,7 @@ go_library(
"walker_map.go",
"walker_unmap.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0/pagetables",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables",
visibility = [
"//pkg/sentry/platform/kvm:__subpackages__",
"//pkg/sentry/platform/ring0:__subpackages__",
diff --git a/pkg/sentry/platform/ring0/pagetables/allocator_unsafe.go b/pkg/sentry/platform/ring0/pagetables/allocator_unsafe.go
index 1b996b4e2..a90394a33 100644
--- a/pkg/sentry/platform/ring0/pagetables/allocator_unsafe.go
+++ b/pkg/sentry/platform/ring0/pagetables/allocator_unsafe.go
@@ -17,7 +17,7 @@ package pagetables
import (
"unsafe"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// newAlignedPTEs returns a set of aligned PTEs.
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables.go b/pkg/sentry/platform/ring0/pagetables/pagetables.go
index e5dcaada7..904f1a6de 100644
--- a/pkg/sentry/platform/ring0/pagetables/pagetables.go
+++ b/pkg/sentry/platform/ring0/pagetables/pagetables.go
@@ -21,7 +21,7 @@
package pagetables
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// PageTables is a set of page tables.
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go b/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go
index a1ec4b109..35e917526 100644
--- a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go
+++ b/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go
@@ -19,7 +19,7 @@ package pagetables
import (
"testing"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
func Test2MAnd4K(t *testing.T) {
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_test.go b/pkg/sentry/platform/ring0/pagetables/pagetables_test.go
index 36e424495..6e95ad2b9 100644
--- a/pkg/sentry/platform/ring0/pagetables/pagetables_test.go
+++ b/pkg/sentry/platform/ring0/pagetables/pagetables_test.go
@@ -17,7 +17,7 @@ package pagetables
import (
"testing"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
type mapping struct {
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go b/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go
index ff427fbe9..3e2383c5e 100644
--- a/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go
+++ b/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go
@@ -19,7 +19,7 @@ package pagetables
import (
"sync/atomic"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// archPageTables is architecture-specific data.
diff --git a/pkg/sentry/platform/ring0/x86.go b/pkg/sentry/platform/ring0/x86.go
index 7e5ceafdb..5f80d64e8 100644
--- a/pkg/sentry/platform/ring0/x86.go
+++ b/pkg/sentry/platform/ring0/x86.go
@@ -17,7 +17,7 @@
package ring0
import (
- "gvisor.googlesource.com/gvisor/pkg/cpuid"
+ "gvisor.dev/gvisor/pkg/cpuid"
)
// Useful bits.
diff --git a/pkg/sentry/platform/safecopy/BUILD b/pkg/sentry/platform/safecopy/BUILD
index d97a40297..924d8a6d6 100644
--- a/pkg/sentry/platform/safecopy/BUILD
+++ b/pkg/sentry/platform/safecopy/BUILD
@@ -16,7 +16,7 @@ go_library(
"sighandler_amd64.s",
"sighandler_arm64.s",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/platform/safecopy",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/platform/safecopy",
visibility = ["//pkg/sentry:internal"],
deps = ["//pkg/syserror"],
)
diff --git a/pkg/sentry/platform/safecopy/safecopy.go b/pkg/sentry/platform/safecopy/safecopy.go
index 5126871eb..2fb7e5809 100644
--- a/pkg/sentry/platform/safecopy/safecopy.go
+++ b/pkg/sentry/platform/safecopy/safecopy.go
@@ -22,7 +22,7 @@ import (
"runtime"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// SegvError is returned when a safecopy function receives SIGSEGV.
diff --git a/pkg/sentry/safemem/BUILD b/pkg/sentry/safemem/BUILD
index 3ab453718..fd6dc8e6e 100644
--- a/pkg/sentry/safemem/BUILD
+++ b/pkg/sentry/safemem/BUILD
@@ -10,7 +10,7 @@ go_library(
"safemem.go",
"seq_unsafe.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/safemem",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/safemem",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/sentry/platform/safecopy",
diff --git a/pkg/sentry/safemem/block_unsafe.go b/pkg/sentry/safemem/block_unsafe.go
index 1f72deb61..6f03c94bf 100644
--- a/pkg/sentry/safemem/block_unsafe.go
+++ b/pkg/sentry/safemem/block_unsafe.go
@@ -19,7 +19,7 @@ import (
"reflect"
"unsafe"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/safecopy"
+ "gvisor.dev/gvisor/pkg/sentry/platform/safecopy"
)
// A Block is a range of contiguous bytes, similar to []byte but with the
diff --git a/pkg/sentry/sighandling/BUILD b/pkg/sentry/sighandling/BUILD
index cec3af92e..f561670c7 100644
--- a/pkg/sentry/sighandling/BUILD
+++ b/pkg/sentry/sighandling/BUILD
@@ -8,7 +8,7 @@ go_library(
"sighandling.go",
"sighandling_unsafe.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/sighandling",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/sighandling",
visibility = ["//pkg/sentry:internal"],
deps = ["//pkg/abi/linux"],
)
diff --git a/pkg/sentry/sighandling/sighandling.go b/pkg/sentry/sighandling/sighandling.go
index 659b43363..2f65db70b 100644
--- a/pkg/sentry/sighandling/sighandling.go
+++ b/pkg/sentry/sighandling/sighandling.go
@@ -22,7 +22,7 @@ import (
"reflect"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/abi/linux"
)
// numSignals is the number of normal (non-realtime) signals on Linux.
diff --git a/pkg/sentry/sighandling/sighandling_unsafe.go b/pkg/sentry/sighandling/sighandling_unsafe.go
index aca77888a..eace3766d 100644
--- a/pkg/sentry/sighandling/sighandling_unsafe.go
+++ b/pkg/sentry/sighandling/sighandling_unsafe.go
@@ -20,7 +20,7 @@ import (
"syscall"
"unsafe"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/abi/linux"
)
// TODO(b/34161764): Move to pkg/abi/linux along with definitions in
diff --git a/pkg/sentry/socket/BUILD b/pkg/sentry/socket/BUILD
index 076f953e7..2b03ea87c 100644
--- a/pkg/sentry/socket/BUILD
+++ b/pkg/sentry/socket/BUILD
@@ -5,7 +5,7 @@ load("//tools/go_stateify:defs.bzl", "go_library")
go_library(
name = "socket",
srcs = ["socket.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/socket",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/socket",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
@@ -14,7 +14,6 @@ go_library(
"//pkg/sentry/fs",
"//pkg/sentry/fs/fsutil",
"//pkg/sentry/kernel",
- "//pkg/sentry/kernel/kdefs",
"//pkg/sentry/kernel/time",
"//pkg/sentry/socket/unix/transport",
"//pkg/sentry/usermem",
diff --git a/pkg/sentry/socket/control/BUILD b/pkg/sentry/socket/control/BUILD
index 9f4763906..81dbd7309 100644
--- a/pkg/sentry/socket/control/BUILD
+++ b/pkg/sentry/socket/control/BUILD
@@ -5,9 +5,9 @@ load("//tools/go_stateify:defs.bzl", "go_library")
go_library(
name = "control",
srcs = ["control.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/socket/control",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/socket/control",
imports = [
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs",
+ "gvisor.dev/gvisor/pkg/sentry/fs",
],
visibility = ["//pkg/sentry:internal"],
deps = [
@@ -17,7 +17,6 @@ go_library(
"//pkg/sentry/fs",
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
- "//pkg/sentry/kernel/kdefs",
"//pkg/sentry/socket/unix/transport",
"//pkg/sentry/usermem",
"//pkg/syserror",
diff --git a/pkg/sentry/socket/control/control.go b/pkg/sentry/socket/control/control.go
index 434d7ca2e..4f4a20dfe 100644
--- a/pkg/sentry/socket/control/control.go
+++ b/pkg/sentry/socket/control/control.go
@@ -17,16 +17,15 @@
package control
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/binary"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
const maxInt = int(^uint(0) >> 1)
@@ -63,7 +62,7 @@ type RightsFiles []*fs.File
func NewSCMRights(t *kernel.Task, fds []int32) (SCMRights, error) {
files := make(RightsFiles, 0, len(fds))
for _, fd := range fds {
- file, _ := t.FDMap().GetDescriptor(kdefs.FD(fd))
+ file := t.GetFile(fd)
if file == nil {
files.Release()
return nil, syserror.EBADF
@@ -109,7 +108,9 @@ func rightsFDs(t *kernel.Task, rights SCMRights, cloexec bool, max int) ([]int32
files, trunc := rights.Files(t, max)
fds := make([]int32, 0, len(files))
for i := 0; i < max && len(files) > 0; i++ {
- fd, err := t.FDMap().NewFDFrom(0, files[0], kernel.FDFlags{cloexec}, t.ThreadGroup().Limits())
+ fd, err := t.NewFDFrom(0, files[0], kernel.FDFlags{
+ CloseOnExec: cloexec,
+ })
files[0].DecRef()
files = files[1:]
if err != nil {
@@ -315,8 +316,7 @@ func PackTimestamp(t *kernel.Task, timestamp int64, buf []byte) []byte {
// Parse parses a raw socket control message into portable objects.
func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte) (transport.ControlMessages, error) {
var (
- fds linux.ControlMessageRights
-
+ fds linux.ControlMessageRights
haveCreds bool
creds linux.ControlMessageCredentials
)
diff --git a/pkg/sentry/socket/epsocket/BUILD b/pkg/sentry/socket/epsocket/BUILD
index 7e2679ea0..1f014f399 100644
--- a/pkg/sentry/socket/epsocket/BUILD
+++ b/pkg/sentry/socket/epsocket/BUILD
@@ -11,7 +11,7 @@ go_library(
"save_restore.go",
"stack.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/socket/epsocket",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/socket/epsocket",
visibility = [
"//pkg/sentry:internal",
],
@@ -28,7 +28,6 @@ go_library(
"//pkg/sentry/inet",
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
- "//pkg/sentry/kernel/kdefs",
"//pkg/sentry/kernel/time",
"//pkg/sentry/safemem",
"//pkg/sentry/socket",
diff --git a/pkg/sentry/socket/epsocket/device.go b/pkg/sentry/socket/epsocket/device.go
index ab4083efe..85484d5b1 100644
--- a/pkg/sentry/socket/epsocket/device.go
+++ b/pkg/sentry/socket/epsocket/device.go
@@ -14,7 +14,7 @@
package epsocket
-import "gvisor.googlesource.com/gvisor/pkg/sentry/device"
+import "gvisor.dev/gvisor/pkg/sentry/device"
// epsocketDevice is the endpoint socket virtual device.
var epsocketDevice = device.NewAnonDevice()
diff --git a/pkg/sentry/socket/epsocket/epsocket.go b/pkg/sentry/socket/epsocket/epsocket.go
index f67451179..69eff7373 100644
--- a/pkg/sentry/socket/epsocket/epsocket.go
+++ b/pkg/sentry/socket/epsocket/epsocket.go
@@ -31,28 +31,27 @@ import (
"syscall"
"time"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/binary"
- "gvisor.googlesource.com/gvisor/pkg/metric"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/inet"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket"
- "gvisor.googlesource.com/gvisor/pkg/sentry/unimpl"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserr"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/metric"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/inet"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/safemem"
+ "gvisor.dev/gvisor/pkg/sentry/socket"
+ "gvisor.dev/gvisor/pkg/sentry/unimpl"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserr"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/tcp"
+ "gvisor.dev/gvisor/pkg/waiter"
)
func mustCreateMetric(name, description string) *tcpip.StatCounter {
@@ -262,7 +261,7 @@ func New(t *kernel.Task, family int, skType linux.SockType, protocol int, queue
dirent := socket.NewDirent(t, epsocketDevice)
defer dirent.DecRef()
- return fs.NewFile(t, dirent, fs.FileFlags{Read: true, Write: true}, &SocketOperations{
+ return fs.NewFile(t, dirent, fs.FileFlags{Read: true, Write: true, NonSeekable: true}, &SocketOperations{
Queue: queue,
family: family,
Endpoint: endpoint,
@@ -286,14 +285,14 @@ func bytesToIPAddress(addr []byte) tcpip.Address {
// GetAddress reads an sockaddr struct from the given address and converts it
// to the FullAddress format. It supports AF_UNIX, AF_INET and AF_INET6
// addresses.
-func GetAddress(sfamily int, addr []byte) (tcpip.FullAddress, *syserr.Error) {
+func GetAddress(sfamily int, addr []byte, strict bool) (tcpip.FullAddress, *syserr.Error) {
// Make sure we have at least 2 bytes for the address family.
if len(addr) < 2 {
return tcpip.FullAddress{}, syserr.ErrInvalidArgument
}
family := usermem.ByteOrder.Uint16(addr)
- if family != uint16(sfamily) {
+ if family != uint16(sfamily) && (!strict && family != linux.AF_UNSPEC) {
return tcpip.FullAddress{}, syserr.ErrAddressFamilyNotSupported
}
@@ -318,7 +317,7 @@ func GetAddress(sfamily int, addr []byte) (tcpip.FullAddress, *syserr.Error) {
case linux.AF_INET:
var a linux.SockAddrInet
if len(addr) < sockAddrInetSize {
- return tcpip.FullAddress{}, syserr.ErrBadAddress
+ return tcpip.FullAddress{}, syserr.ErrInvalidArgument
}
binary.Unmarshal(addr[:sockAddrInetSize], usermem.ByteOrder, &a)
@@ -331,7 +330,7 @@ func GetAddress(sfamily int, addr []byte) (tcpip.FullAddress, *syserr.Error) {
case linux.AF_INET6:
var a linux.SockAddrInet6
if len(addr) < sockAddrInet6Size {
- return tcpip.FullAddress{}, syserr.ErrBadAddress
+ return tcpip.FullAddress{}, syserr.ErrInvalidArgument
}
binary.Unmarshal(addr[:sockAddrInet6Size], usermem.ByteOrder, &a)
@@ -344,6 +343,9 @@ func GetAddress(sfamily int, addr []byte) (tcpip.FullAddress, *syserr.Error) {
}
return out, nil
+ case linux.AF_UNSPEC:
+ return tcpip.FullAddress{}, nil
+
default:
return tcpip.FullAddress{}, syserr.ErrAddressFamilyNotSupported
}
@@ -466,7 +468,7 @@ func (s *SocketOperations) Readiness(mask waiter.EventMask) waiter.EventMask {
// Connect implements the linux syscall connect(2) for sockets backed by
// tpcip.Endpoint.
func (s *SocketOperations) Connect(t *kernel.Task, sockaddr []byte, blocking bool) *syserr.Error {
- addr, err := GetAddress(s.family, sockaddr)
+ addr, err := GetAddress(s.family, sockaddr, false /* strict */)
if err != nil {
return err
}
@@ -499,7 +501,7 @@ func (s *SocketOperations) Connect(t *kernel.Task, sockaddr []byte, blocking boo
// Bind implements the linux syscall bind(2) for sockets backed by
// tcpip.Endpoint.
func (s *SocketOperations) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error {
- addr, err := GetAddress(s.family, sockaddr)
+ addr, err := GetAddress(s.family, sockaddr, true /* strict */)
if err != nil {
return err
}
@@ -537,7 +539,7 @@ func (s *SocketOperations) blockingAccept(t *kernel.Task) (tcpip.Endpoint, *wait
// Accept implements the linux syscall accept(2) for sockets backed by
// tcpip.Endpoint.
-func (s *SocketOperations) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (kdefs.FD, interface{}, uint32, *syserr.Error) {
+func (s *SocketOperations) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (int32, interface{}, uint32, *syserr.Error) {
// Issue the accept request to get the new endpoint.
ep, wq, terr := s.Endpoint.Accept()
if terr != nil {
@@ -575,10 +577,9 @@ func (s *SocketOperations) Accept(t *kernel.Task, peerRequested bool, flags int,
}
}
- fdFlags := kernel.FDFlags{
+ fd, e := t.NewFDFrom(0, ns, kernel.FDFlags{
CloseOnExec: flags&linux.SOCK_CLOEXEC != 0,
- }
- fd, e := t.FDMap().NewFDFrom(0, ns, fdFlags, t.ThreadGroup().Limits())
+ })
t.Kernel().RecordSocket(ns)
@@ -668,12 +669,6 @@ func GetSockOpt(t *kernel.Task, s socket.Socket, ep commonEndpoint, family int,
func getSockOptSocket(t *kernel.Task, s socket.Socket, ep commonEndpoint, family int, skType linux.SockType, name, outLen int) (interface{}, *syserr.Error) {
// TODO(b/124056281): Stop rejecting short optLen values in getsockopt.
switch name {
- case linux.SO_TYPE:
- if outLen < sizeOfInt32 {
- return nil, syserr.ErrInvalidArgument
- }
- return int32(skType), nil
-
case linux.SO_ERROR:
if outLen < sizeOfInt32 {
return nil, syserr.ErrInvalidArgument
@@ -872,6 +867,18 @@ func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interfa
return int32(v), nil
+ case linux.TCP_MAXSEG:
+ if outLen < sizeOfInt32 {
+ return nil, syserr.ErrInvalidArgument
+ }
+
+ var v tcpip.MaxSegOption
+ if err := ep.GetSockOpt(&v); err != nil {
+ return nil, syserr.TranslateNetstackError(err)
+ }
+
+ return int32(v), nil
+
case linux.TCP_KEEPIDLE:
if outLen < sizeOfInt32 {
return nil, syserr.ErrInvalidArgument
@@ -920,6 +927,30 @@ func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interfa
t.Kernel().EmitUnimplementedEvent(t)
+ case linux.TCP_CONGESTION:
+ if outLen <= 0 {
+ return nil, syserr.ErrInvalidArgument
+ }
+
+ var v tcpip.CongestionControlOption
+ if err := ep.GetSockOpt(&v); err != nil {
+ return nil, syserr.TranslateNetstackError(err)
+ }
+
+ // We match linux behaviour here where it returns the lower of
+ // TCP_CA_NAME_MAX bytes or the value of the option length.
+ //
+ // This is Linux's net/tcp.h TCP_CA_NAME_MAX.
+ const tcpCANameMax = 16
+
+ toCopy := tcpCANameMax
+ if outLen < tcpCANameMax {
+ toCopy = outLen
+ }
+ b := make([]byte, toCopy)
+ copy(b, v)
+ return b, nil
+
default:
emitUnimplementedEventTCP(t, name)
}
@@ -1200,6 +1231,14 @@ func setSockOptTCP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *
v := usermem.ByteOrder.Uint32(optVal)
return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.QuickAckOption(v)))
+ case linux.TCP_MAXSEG:
+ if len(optVal) < sizeOfInt32 {
+ return syserr.ErrInvalidArgument
+ }
+
+ v := usermem.ByteOrder.Uint32(optVal)
+ return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.MaxSegOption(v)))
+
case linux.TCP_KEEPIDLE:
if len(optVal) < sizeOfInt32 {
return syserr.ErrInvalidArgument
@@ -1222,6 +1261,12 @@ func setSockOptTCP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *
}
return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.KeepaliveIntervalOption(time.Second * time.Duration(v))))
+ case linux.TCP_CONGESTION:
+ v := tcpip.CongestionControlOption(optVal)
+ if err := ep.SetSockOpt(v); err != nil {
+ return syserr.TranslateNetstackError(err)
+ }
+ return nil
case linux.TCP_REPAIR_OPTIONS:
t.Kernel().EmitUnimplementedEvent(t)
@@ -1695,6 +1740,7 @@ func (s *SocketOperations) coalescingRead(ctx context.Context, dst usermem.IOSeq
// If we managed to copy something, we must deliver it.
if copied > 0 {
+ s.Endpoint.ModerateRecvBuf(copied)
return copied, nil
}
@@ -1899,7 +1945,7 @@ func (s *SocketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to []
var addr *tcpip.FullAddress
if len(to) > 0 {
- addrBuf, err := GetAddress(s.family, to)
+ addrBuf, err := GetAddress(s.family, to, true /* strict */)
if err != nil {
return 0, err
}
@@ -1968,7 +2014,7 @@ func (s *SocketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to []
}
// Ioctl implements fs.FileOperations.Ioctl.
-func (s *SocketOperations) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+func (s *SocketOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
// SIOCGSTAMP is implemented by epsocket rather than all commonEndpoint
// sockets.
// TODO(b/78348848): Add a commonEndpoint method to support SIOCGSTAMP.
diff --git a/pkg/sentry/socket/epsocket/provider.go b/pkg/sentry/socket/epsocket/provider.go
index 516582828..6d2b5d038 100644
--- a/pkg/sentry/socket/epsocket/provider.go
+++ b/pkg/sentry/socket/epsocket/provider.go
@@ -17,20 +17,20 @@ package epsocket
import (
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket"
- "gvisor.googlesource.com/gvisor/pkg/syserr"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/udp"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/socket"
+ "gvisor.dev/gvisor/pkg/syserr"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
+ "gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/tcp"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/udp"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// provider is an inet socket provider.
@@ -111,7 +111,7 @@ func (p *provider) Socket(t *kernel.Task, stype linux.SockType, protocol int) (*
return nil, syserr.TranslateNetstackError(e)
}
- return New(t, p.family, stype, protocol, wq, ep)
+ return New(t, p.family, stype, int(transProto), wq, ep)
}
// Pair just returns nil sockets (not supported).
diff --git a/pkg/sentry/socket/epsocket/save_restore.go b/pkg/sentry/socket/epsocket/save_restore.go
index feaafb7cc..f7b8c10cc 100644
--- a/pkg/sentry/socket/epsocket/save_restore.go
+++ b/pkg/sentry/socket/epsocket/save_restore.go
@@ -15,7 +15,7 @@
package epsocket
import (
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
)
// afterLoad is invoked by stateify.
diff --git a/pkg/sentry/socket/epsocket/stack.go b/pkg/sentry/socket/epsocket/stack.go
index edefa225b..1627a4f68 100644
--- a/pkg/sentry/socket/epsocket/stack.go
+++ b/pkg/sentry/socket/epsocket/stack.go
@@ -15,14 +15,14 @@
package epsocket
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/inet"
- "gvisor.googlesource.com/gvisor/pkg/syserr"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/inet"
+ "gvisor.dev/gvisor/pkg/syserr"
+ "gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
+ "gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/tcp"
)
// Stack implements inet.Stack for netstack/tcpip/stack.Stack.
diff --git a/pkg/sentry/socket/hostinet/BUILD b/pkg/sentry/socket/hostinet/BUILD
index 975f47bc3..a951f1bb0 100644
--- a/pkg/sentry/socket/hostinet/BUILD
+++ b/pkg/sentry/socket/hostinet/BUILD
@@ -12,7 +12,7 @@ go_library(
"socket_unsafe.go",
"stack.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/socket/hostinet",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/socket/hostinet",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
@@ -26,7 +26,6 @@ go_library(
"//pkg/sentry/fs/fsutil",
"//pkg/sentry/inet",
"//pkg/sentry/kernel",
- "//pkg/sentry/kernel/kdefs",
"//pkg/sentry/kernel/time",
"//pkg/sentry/safemem",
"//pkg/sentry/socket",
diff --git a/pkg/sentry/socket/hostinet/device.go b/pkg/sentry/socket/hostinet/device.go
index 4267e3691..27049d65f 100644
--- a/pkg/sentry/socket/hostinet/device.go
+++ b/pkg/sentry/socket/hostinet/device.go
@@ -14,6 +14,6 @@
package hostinet
-import "gvisor.googlesource.com/gvisor/pkg/sentry/device"
+import "gvisor.dev/gvisor/pkg/sentry/device"
var socketDevice = device.NewAnonDevice()
diff --git a/pkg/sentry/socket/hostinet/socket.go b/pkg/sentry/socket/hostinet/socket.go
index c62c8d8f1..7f69406b7 100644
--- a/pkg/sentry/socket/hostinet/socket.go
+++ b/pkg/sentry/socket/hostinet/socket.go
@@ -18,22 +18,21 @@ import (
"fmt"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/binary"
- "gvisor.googlesource.com/gvisor/pkg/fdnotifier"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserr"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/fdnotifier"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/safemem"
+ "gvisor.dev/gvisor/pkg/sentry/socket"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserr"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
const (
@@ -77,7 +76,7 @@ func newSocketFile(ctx context.Context, family int, stype linux.SockType, protoc
}
dirent := socket.NewDirent(ctx, socketDevice)
defer dirent.DecRef()
- return fs.NewFile(ctx, dirent, fs.FileFlags{NonBlocking: nonblock, Read: true, Write: true}, s), nil
+ return fs.NewFile(ctx, dirent, fs.FileFlags{NonBlocking: nonblock, Read: true, Write: true, NonSeekable: true}, s), nil
}
// Release implements fs.FileOperations.Release.
@@ -190,7 +189,7 @@ func (s *socketOperations) Connect(t *kernel.Task, sockaddr []byte, blocking boo
}
// Accept implements socket.Socket.Accept.
-func (s *socketOperations) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (kdefs.FD, interface{}, uint32, *syserr.Error) {
+func (s *socketOperations) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (int32, interface{}, uint32, *syserr.Error) {
var peerAddr []byte
var peerAddrlen uint32
var peerAddrPtr *byte
@@ -236,11 +235,11 @@ func (s *socketOperations) Accept(t *kernel.Task, peerRequested bool, flags int,
}
defer f.DecRef()
- fdFlags := kernel.FDFlags{
+ kfd, kerr := t.NewFDFrom(0, f, kernel.FDFlags{
CloseOnExec: flags&syscall.SOCK_CLOEXEC != 0,
- }
- kfd, kerr := t.FDMap().NewFDFrom(0, f, fdFlags, t.ThreadGroup().Limits())
+ })
t.Kernel().RecordSocket(f)
+
return kfd, peerAddr, peerAddrlen, syserr.FromError(kerr)
}
@@ -288,7 +287,7 @@ func (s *socketOperations) GetSockOpt(t *kernel.Task, level int, name int, outLe
}
case syscall.SOL_SOCKET:
switch name {
- case syscall.SO_ERROR, syscall.SO_KEEPALIVE, syscall.SO_SNDBUF, syscall.SO_RCVBUF, syscall.SO_REUSEADDR, syscall.SO_TYPE:
+ case syscall.SO_ERROR, syscall.SO_KEEPALIVE, syscall.SO_SNDBUF, syscall.SO_RCVBUF, syscall.SO_REUSEADDR:
optlen = sizeofInt32
case syscall.SO_LINGER:
optlen = syscall.SizeofLinger
diff --git a/pkg/sentry/socket/hostinet/socket_unsafe.go b/pkg/sentry/socket/hostinet/socket_unsafe.go
index eed0c7837..6c69ba9c7 100644
--- a/pkg/sentry/socket/hostinet/socket_unsafe.go
+++ b/pkg/sentry/socket/hostinet/socket_unsafe.go
@@ -18,12 +18,13 @@ import (
"syscall"
"unsafe"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserr"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserr"
+ "gvisor.dev/gvisor/pkg/syserror"
)
func firstBytePtr(bs []byte) unsafe.Pointer {
@@ -52,7 +53,7 @@ func writev(fd int, srcs []syscall.Iovec) (uint64, error) {
}
// Ioctl implements fs.FileOperations.Ioctl.
-func (s *socketOperations) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+func (s *socketOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
switch cmd := uintptr(args[1].Int()); cmd {
case syscall.TIOCINQ, syscall.TIOCOUTQ:
var val int32
diff --git a/pkg/sentry/socket/hostinet/stack.go b/pkg/sentry/socket/hostinet/stack.go
index 9c45991ba..11f94281c 100644
--- a/pkg/sentry/socket/hostinet/stack.go
+++ b/pkg/sentry/socket/hostinet/stack.go
@@ -21,12 +21,12 @@ import (
"strings"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/binary"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/inet"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/inet"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
var defaultRecvBufSize = inet.TCPBufferSize{
diff --git a/pkg/sentry/socket/netlink/BUILD b/pkg/sentry/socket/netlink/BUILD
index 148306329..45ebb2a0e 100644
--- a/pkg/sentry/socket/netlink/BUILD
+++ b/pkg/sentry/socket/netlink/BUILD
@@ -9,7 +9,7 @@ go_library(
"provider.go",
"socket.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/socket/netlink",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/socket/netlink",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
@@ -20,7 +20,6 @@ go_library(
"//pkg/sentry/fs",
"//pkg/sentry/fs/fsutil",
"//pkg/sentry/kernel",
- "//pkg/sentry/kernel/kdefs",
"//pkg/sentry/kernel/time",
"//pkg/sentry/socket",
"//pkg/sentry/socket/netlink/port",
diff --git a/pkg/sentry/socket/netlink/message.go b/pkg/sentry/socket/netlink/message.go
index 5bd3b49ce..ce0a1afd0 100644
--- a/pkg/sentry/socket/netlink/message.go
+++ b/pkg/sentry/socket/netlink/message.go
@@ -18,9 +18,9 @@ import (
"fmt"
"math"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/binary"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// alignUp rounds a length up to an alignment.
diff --git a/pkg/sentry/socket/netlink/port/BUILD b/pkg/sentry/socket/netlink/port/BUILD
index a7370a4ec..9e2e12799 100644
--- a/pkg/sentry/socket/netlink/port/BUILD
+++ b/pkg/sentry/socket/netlink/port/BUILD
@@ -5,7 +5,7 @@ load("//tools/go_stateify:defs.bzl", "go_library", "go_test")
go_library(
name = "port",
srcs = ["port.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/socket/netlink/port",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/socket/netlink/port",
visibility = ["//pkg/sentry:internal"],
)
diff --git a/pkg/sentry/socket/netlink/provider.go b/pkg/sentry/socket/netlink/provider.go
index 5dc103877..689cad997 100644
--- a/pkg/sentry/socket/netlink/provider.go
+++ b/pkg/sentry/socket/netlink/provider.go
@@ -17,12 +17,12 @@ package netlink
import (
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket"
- "gvisor.googlesource.com/gvisor/pkg/syserr"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/socket"
+ "gvisor.dev/gvisor/pkg/syserr"
)
// Protocol is the implementation of a netlink socket protocol.
@@ -89,7 +89,7 @@ func (*socketProvider) Socket(t *kernel.Task, stype linux.SockType, protocol int
d := socket.NewDirent(t, netlinkSocketDevice)
defer d.DecRef()
- return fs.NewFile(t, d, fs.FileFlags{Read: true, Write: true}, s), nil
+ return fs.NewFile(t, d, fs.FileFlags{Read: true, Write: true, NonSeekable: true}, s), nil
}
// Pair implements socket.Provider.Pair by returning an error.
diff --git a/pkg/sentry/socket/netlink/route/BUILD b/pkg/sentry/socket/netlink/route/BUILD
index be0419679..5dc8533ec 100644
--- a/pkg/sentry/socket/netlink/route/BUILD
+++ b/pkg/sentry/socket/netlink/route/BUILD
@@ -5,7 +5,7 @@ load("//tools/go_stateify:defs.bzl", "go_library")
go_library(
name = "route",
srcs = ["protocol.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/socket/netlink/route",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/socket/netlink/route",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
diff --git a/pkg/sentry/socket/netlink/route/protocol.go b/pkg/sentry/socket/netlink/route/protocol.go
index 9f0a81403..fb1ff329c 100644
--- a/pkg/sentry/socket/netlink/route/protocol.go
+++ b/pkg/sentry/socket/netlink/route/protocol.go
@@ -18,13 +18,13 @@ package route
import (
"bytes"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/inet"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/netlink"
- "gvisor.googlesource.com/gvisor/pkg/syserr"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/inet"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/socket/netlink"
+ "gvisor.dev/gvisor/pkg/syserr"
)
// commandKind describes the operational class of a message type.
diff --git a/pkg/sentry/socket/netlink/socket.go b/pkg/sentry/socket/netlink/socket.go
index 62659784a..f3d6c1e9b 100644
--- a/pkg/sentry/socket/netlink/socket.go
+++ b/pkg/sentry/socket/netlink/socket.go
@@ -19,25 +19,24 @@ import (
"math"
"sync"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/binary"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/device"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/netlink/port"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserr"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/device"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/socket"
+ "gvisor.dev/gvisor/pkg/sentry/socket/netlink/port"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserr"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/waiter"
)
const sizeOfInt32 int = 4
@@ -111,7 +110,7 @@ var _ socket.Socket = (*Socket)(nil)
// NewSocket creates a new Socket.
func NewSocket(t *kernel.Task, skType linux.SockType, protocol Protocol) (*Socket, *syserr.Error) {
// Datagram endpoint used to buffer kernel -> user messages.
- ep := transport.NewConnectionless()
+ ep := transport.NewConnectionless(t)
// Bind the endpoint for good measure so we can connect to it. The
// bound address will never be exposed.
@@ -121,7 +120,7 @@ func NewSocket(t *kernel.Task, skType linux.SockType, protocol Protocol) (*Socke
}
// Create a connection from which the kernel can write messages.
- connection, err := ep.(transport.BoundEndpoint).UnidirectionalConnect()
+ connection, err := ep.(transport.BoundEndpoint).UnidirectionalConnect(t)
if err != nil {
ep.Close()
return nil, err
@@ -173,7 +172,7 @@ func (s *Socket) EventUnregister(e *waiter.Entry) {
}
// Ioctl implements fs.FileOperations.Ioctl.
-func (s *Socket) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+func (*Socket) Ioctl(context.Context, *fs.File, usermem.IO, arch.SyscallArguments) (uintptr, error) {
// TODO(b/68878065): no ioctls supported.
return 0, syserror.ENOTTY
}
@@ -272,7 +271,7 @@ func (s *Socket) Connect(t *kernel.Task, sockaddr []byte, blocking bool) *syserr
}
// Accept implements socket.Socket.Accept.
-func (s *Socket) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (kdefs.FD, interface{}, uint32, *syserr.Error) {
+func (s *Socket) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (int32, interface{}, uint32, *syserr.Error) {
// Netlink sockets never support accept.
return 0, nil, 0, syserr.ErrNotSupported
}
diff --git a/pkg/sentry/socket/rpcinet/BUILD b/pkg/sentry/socket/rpcinet/BUILD
index 33ba20de7..5061dcbde 100644
--- a/pkg/sentry/socket/rpcinet/BUILD
+++ b/pkg/sentry/socket/rpcinet/BUILD
@@ -12,7 +12,7 @@ go_library(
"stack.go",
"stack_unsafe.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet",
visibility = ["//pkg/sentry:internal"],
deps = [
":syscall_rpc_go_proto",
@@ -25,7 +25,6 @@ go_library(
"//pkg/sentry/fs/fsutil",
"//pkg/sentry/inet",
"//pkg/sentry/kernel",
- "//pkg/sentry/kernel/kdefs",
"//pkg/sentry/kernel/time",
"//pkg/sentry/socket",
"//pkg/sentry/socket/hostinet",
@@ -52,7 +51,7 @@ proto_library(
go_proto_library(
name = "syscall_rpc_go_proto",
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto",
proto = ":syscall_rpc_proto",
visibility = [
"//visibility:public",
diff --git a/pkg/sentry/socket/rpcinet/conn/BUILD b/pkg/sentry/socket/rpcinet/conn/BUILD
index 4336ae9b4..23eadcb1b 100644
--- a/pkg/sentry/socket/rpcinet/conn/BUILD
+++ b/pkg/sentry/socket/rpcinet/conn/BUILD
@@ -5,7 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "conn",
srcs = ["conn.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet/conn",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/conn",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/binary",
diff --git a/pkg/sentry/socket/rpcinet/conn/conn.go b/pkg/sentry/socket/rpcinet/conn/conn.go
index f537c7f63..356adad99 100644
--- a/pkg/sentry/socket/rpcinet/conn/conn.go
+++ b/pkg/sentry/socket/rpcinet/conn/conn.go
@@ -22,11 +22,11 @@ import (
"syscall"
"github.com/golang/protobuf/proto"
- "gvisor.googlesource.com/gvisor/pkg/binary"
- "gvisor.googlesource.com/gvisor/pkg/syserr"
- "gvisor.googlesource.com/gvisor/pkg/unet"
+ "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/syserr"
+ "gvisor.dev/gvisor/pkg/unet"
- pb "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto"
+ pb "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto"
)
type request struct {
diff --git a/pkg/sentry/socket/rpcinet/device.go b/pkg/sentry/socket/rpcinet/device.go
index 44c0a39b7..8cfd5f6e5 100644
--- a/pkg/sentry/socket/rpcinet/device.go
+++ b/pkg/sentry/socket/rpcinet/device.go
@@ -14,6 +14,6 @@
package rpcinet
-import "gvisor.googlesource.com/gvisor/pkg/sentry/device"
+import "gvisor.dev/gvisor/pkg/sentry/device"
var socketDevice = device.NewAnonDevice()
diff --git a/pkg/sentry/socket/rpcinet/notifier/BUILD b/pkg/sentry/socket/rpcinet/notifier/BUILD
index b0b107ddb..a536f2e44 100644
--- a/pkg/sentry/socket/rpcinet/notifier/BUILD
+++ b/pkg/sentry/socket/rpcinet/notifier/BUILD
@@ -5,7 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "notifier",
srcs = ["notifier.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet/notifier",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/notifier",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/sentry/socket/rpcinet:syscall_rpc_go_proto",
diff --git a/pkg/sentry/socket/rpcinet/notifier/notifier.go b/pkg/sentry/socket/rpcinet/notifier/notifier.go
index 601e05994..aa157dd51 100644
--- a/pkg/sentry/socket/rpcinet/notifier/notifier.go
+++ b/pkg/sentry/socket/rpcinet/notifier/notifier.go
@@ -20,9 +20,9 @@ import (
"sync"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet/conn"
- pb "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/conn"
+ pb "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto"
+ "gvisor.dev/gvisor/pkg/waiter"
)
type fdInfo struct {
diff --git a/pkg/sentry/socket/rpcinet/socket.go b/pkg/sentry/socket/rpcinet/socket.go
index c22ff1ff0..ccaaddbfc 100644
--- a/pkg/sentry/socket/rpcinet/socket.go
+++ b/pkg/sentry/socket/rpcinet/socket.go
@@ -19,26 +19,25 @@ import (
"syscall"
"time"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/binary"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet/conn"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet/notifier"
- pb "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto"
- "gvisor.googlesource.com/gvisor/pkg/sentry/unimpl"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserr"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/socket"
+ "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/conn"
+ "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/notifier"
+ pb "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto"
+ "gvisor.dev/gvisor/pkg/sentry/unimpl"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserr"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// socketOperations implements fs.FileOperations and socket.Socket for a socket
@@ -286,7 +285,7 @@ func rpcAccept(t *kernel.Task, fd uint32, peer bool) (*pb.AcceptResponse_ResultP
}
// Accept implements socket.Socket.Accept.
-func (s *socketOperations) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (kdefs.FD, interface{}, uint32, *syserr.Error) {
+func (s *socketOperations) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (int32, interface{}, uint32, *syserr.Error) {
payload, se := rpcAccept(t, s.fd, peerRequested)
// Check if we need to block.
@@ -322,7 +321,13 @@ func (s *socketOperations) Accept(t *kernel.Task, peerRequested bool, flags int,
dirent := socket.NewDirent(t, socketDevice)
defer dirent.DecRef()
- file := fs.NewFile(t, dirent, fs.FileFlags{Read: true, Write: true, NonBlocking: flags&linux.SOCK_NONBLOCK != 0}, &socketOperations{
+ fileFlags := fs.FileFlags{
+ Read: true,
+ Write: true,
+ NonSeekable: true,
+ NonBlocking: flags&linux.SOCK_NONBLOCK != 0,
+ }
+ file := fs.NewFile(t, dirent, fileFlags, &socketOperations{
wq: &wq,
fd: payload.Fd,
rpcConn: s.rpcConn,
@@ -330,10 +335,9 @@ func (s *socketOperations) Accept(t *kernel.Task, peerRequested bool, flags int,
})
defer file.DecRef()
- fdFlags := kernel.FDFlags{
+ fd, err := t.NewFDFrom(0, file, kernel.FDFlags{
CloseOnExec: flags&linux.SOCK_CLOEXEC != 0,
- }
- fd, err := t.FDMap().NewFDFrom(0, file, fdFlags, t.ThreadGroup().Limits())
+ })
if err != nil {
return 0, nil, 0, syserr.FromError(err)
}
@@ -558,7 +562,7 @@ func ifconfIoctlFromStack(ctx context.Context, io usermem.IO, ifc *linux.IFConf)
}
// Ioctl implements fs.FileOperations.Ioctl.
-func (s *socketOperations) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+func (s *socketOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
t := ctx.(*kernel.Task)
cmd := uint32(args[1].Int())
diff --git a/pkg/sentry/socket/rpcinet/stack.go b/pkg/sentry/socket/rpcinet/stack.go
index a1be711df..3038f25a7 100644
--- a/pkg/sentry/socket/rpcinet/stack.go
+++ b/pkg/sentry/socket/rpcinet/stack.go
@@ -18,12 +18,12 @@ import (
"fmt"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/sentry/inet"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/hostinet"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet/conn"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet/notifier"
- "gvisor.googlesource.com/gvisor/pkg/syserr"
- "gvisor.googlesource.com/gvisor/pkg/unet"
+ "gvisor.dev/gvisor/pkg/sentry/inet"
+ "gvisor.dev/gvisor/pkg/sentry/socket/hostinet"
+ "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/conn"
+ "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/notifier"
+ "gvisor.dev/gvisor/pkg/syserr"
+ "gvisor.dev/gvisor/pkg/unet"
)
// Stack implements inet.Stack for RPC backed sockets.
diff --git a/pkg/sentry/socket/rpcinet/stack_unsafe.go b/pkg/sentry/socket/rpcinet/stack_unsafe.go
index e53f578ba..a94bdad83 100644
--- a/pkg/sentry/socket/rpcinet/stack_unsafe.go
+++ b/pkg/sentry/socket/rpcinet/stack_unsafe.go
@@ -18,11 +18,11 @@ import (
"syscall"
"unsafe"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/binary"
- pb "gvisor.googlesource.com/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserr"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/binary"
+ pb "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserr"
)
// NewNetlinkRouteRequest builds a netlink message for getting the RIB,
diff --git a/pkg/sentry/socket/socket.go b/pkg/sentry/socket/socket.go
index d60944b6b..0efa58a58 100644
--- a/pkg/sentry/socket/socket.go
+++ b/pkg/sentry/socket/socket.go
@@ -21,18 +21,17 @@ import (
"fmt"
"sync/atomic"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/device"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserr"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/device"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserr"
+ "gvisor.dev/gvisor/pkg/tcpip"
)
// ControlMessages represents the union of unix control messages and tcpip
@@ -53,7 +52,7 @@ type Socket interface {
// Accept implements the accept4(2) linux syscall.
// Returns fd, real peer address length and error. Real peer address
// length is only set if len(peer) > 0.
- Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (kdefs.FD, interface{}, uint32, *syserr.Error)
+ Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (int32, interface{}, uint32, *syserr.Error)
// Bind implements the bind(2) linux syscall.
Bind(t *kernel.Task, sockaddr []byte) *syserr.Error
@@ -199,7 +198,7 @@ func NewDirent(ctx context.Context, d *device.Device) *fs.Dirent {
User: fs.PermMask{Read: true, Write: true},
}, linux.SOCKFS_MAGIC),
}
- inode := fs.NewInode(iops, fs.NewPseudoMountSource(), fs.StableAttr{
+ inode := fs.NewInode(ctx, iops, fs.NewPseudoMountSource(ctx), fs.StableAttr{
Type: fs.Socket,
DeviceID: d.DeviceID(),
InodeID: ino,
@@ -207,7 +206,7 @@ func NewDirent(ctx context.Context, d *device.Device) *fs.Dirent {
})
// Dirent name matches net/socket.c:sockfs_dname.
- return fs.NewDirent(inode, fmt.Sprintf("socket:[%d]", ino))
+ return fs.NewDirent(ctx, inode, fmt.Sprintf("socket:[%d]", ino))
}
// SendReceiveTimeout stores timeouts for send and receive calls.
diff --git a/pkg/sentry/socket/unix/BUILD b/pkg/sentry/socket/unix/BUILD
index fe6871cc6..da9977fde 100644
--- a/pkg/sentry/socket/unix/BUILD
+++ b/pkg/sentry/socket/unix/BUILD
@@ -9,7 +9,7 @@ go_library(
"io.go",
"unix.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/socket/unix",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
@@ -20,7 +20,6 @@ go_library(
"//pkg/sentry/fs",
"//pkg/sentry/fs/fsutil",
"//pkg/sentry/kernel",
- "//pkg/sentry/kernel/kdefs",
"//pkg/sentry/kernel/time",
"//pkg/sentry/safemem",
"//pkg/sentry/socket",
diff --git a/pkg/sentry/socket/unix/device.go b/pkg/sentry/socket/unix/device.go
index 734d39ee6..db01ac4c9 100644
--- a/pkg/sentry/socket/unix/device.go
+++ b/pkg/sentry/socket/unix/device.go
@@ -14,7 +14,7 @@
package unix
-import "gvisor.googlesource.com/gvisor/pkg/sentry/device"
+import "gvisor.dev/gvisor/pkg/sentry/device"
// unixSocketDevice is the unix socket virtual device.
var unixSocketDevice = device.NewAnonDevice()
diff --git a/pkg/sentry/socket/unix/io.go b/pkg/sentry/socket/unix/io.go
index 5a1475ec2..760c7beab 100644
--- a/pkg/sentry/socket/unix/io.go
+++ b/pkg/sentry/socket/unix/io.go
@@ -15,15 +15,18 @@
package unix
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/safemem"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
+ "gvisor.dev/gvisor/pkg/tcpip"
)
// EndpointWriter implements safemem.Writer that writes to a transport.Endpoint.
//
// EndpointWriter is not thread-safe.
type EndpointWriter struct {
+ Ctx context.Context
+
// Endpoint is the transport.Endpoint to write to.
Endpoint transport.Endpoint
@@ -37,7 +40,7 @@ type EndpointWriter struct {
// WriteFromBlocks implements safemem.Writer.WriteFromBlocks.
func (w *EndpointWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) {
return safemem.FromVecWriterFunc{func(bufs [][]byte) (int64, error) {
- n, err := w.Endpoint.SendMsg(bufs, w.Control, w.To)
+ n, err := w.Endpoint.SendMsg(w.Ctx, bufs, w.Control, w.To)
if err != nil {
return int64(n), err.ToError()
}
@@ -50,6 +53,8 @@ func (w *EndpointWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error)
//
// EndpointReader is not thread-safe.
type EndpointReader struct {
+ Ctx context.Context
+
// Endpoint is the transport.Endpoint to read from.
Endpoint transport.Endpoint
@@ -81,7 +86,7 @@ type EndpointReader struct {
// ReadToBlocks implements safemem.Reader.ReadToBlocks.
func (r *EndpointReader) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) {
return safemem.FromVecReaderFunc{func(bufs [][]byte) (int64, error) {
- n, ms, c, ct, err := r.Endpoint.RecvMsg(bufs, r.Creds, r.NumRights, r.Peek, r.From)
+ n, ms, c, ct, err := r.Endpoint.RecvMsg(r.Ctx, bufs, r.Creds, r.NumRights, r.Peek, r.From)
r.Control = c
r.ControlTrunc = ct
r.MsgSize = ms
diff --git a/pkg/sentry/socket/unix/transport/BUILD b/pkg/sentry/socket/unix/transport/BUILD
index 52f324eed..0b0240336 100644
--- a/pkg/sentry/socket/unix/transport/BUILD
+++ b/pkg/sentry/socket/unix/transport/BUILD
@@ -25,12 +25,13 @@ go_library(
"transport_message_list.go",
"unix.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport",
visibility = ["//:sandbox"],
deps = [
"//pkg/abi/linux",
"//pkg/ilist",
"//pkg/refs",
+ "//pkg/sentry/context",
"//pkg/syserr",
"//pkg/tcpip",
"//pkg/tcpip/buffer",
diff --git a/pkg/sentry/socket/unix/transport/connectioned.go b/pkg/sentry/socket/unix/transport/connectioned.go
index db79ac904..73d2df15d 100644
--- a/pkg/sentry/socket/unix/transport/connectioned.go
+++ b/pkg/sentry/socket/unix/transport/connectioned.go
@@ -17,10 +17,11 @@ package transport
import (
"sync"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/syserr"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/syserr"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// UniqueIDProvider generates a sequence of unique identifiers useful for,
@@ -111,8 +112,13 @@ type connectionedEndpoint struct {
acceptedChan chan *connectionedEndpoint `state:".([]*connectionedEndpoint)"`
}
+var (
+ _ = BoundEndpoint((*connectionedEndpoint)(nil))
+ _ = Endpoint((*connectionedEndpoint)(nil))
+)
+
// NewConnectioned creates a new unbound connectionedEndpoint.
-func NewConnectioned(stype linux.SockType, uid UniqueIDProvider) Endpoint {
+func NewConnectioned(ctx context.Context, stype linux.SockType, uid UniqueIDProvider) Endpoint {
return &connectionedEndpoint{
baseEndpoint: baseEndpoint{Queue: &waiter.Queue{}},
id: uid.UniqueID(),
@@ -122,7 +128,7 @@ func NewConnectioned(stype linux.SockType, uid UniqueIDProvider) Endpoint {
}
// NewPair allocates a new pair of connected unix-domain connectionedEndpoints.
-func NewPair(stype linux.SockType, uid UniqueIDProvider) (Endpoint, Endpoint) {
+func NewPair(ctx context.Context, stype linux.SockType, uid UniqueIDProvider) (Endpoint, Endpoint) {
a := &connectionedEndpoint{
baseEndpoint: baseEndpoint{Queue: &waiter.Queue{}},
id: uid.UniqueID(),
@@ -137,7 +143,9 @@ func NewPair(stype linux.SockType, uid UniqueIDProvider) (Endpoint, Endpoint) {
}
q1 := &queue{ReaderQueue: a.Queue, WriterQueue: b.Queue, limit: initialLimit}
+ q1.EnableLeakCheck("transport.queue")
q2 := &queue{ReaderQueue: b.Queue, WriterQueue: a.Queue, limit: initialLimit}
+ q2.EnableLeakCheck("transport.queue")
if stype == linux.SOCK_STREAM {
a.receiver = &streamQueueReceiver{queueReceiver: queueReceiver{q1}}
@@ -163,7 +171,7 @@ func NewPair(stype linux.SockType, uid UniqueIDProvider) (Endpoint, Endpoint) {
// NewExternal creates a new externally backed Endpoint. It behaves like a
// socketpair.
-func NewExternal(stype linux.SockType, uid UniqueIDProvider, queue *waiter.Queue, receiver Receiver, connected ConnectedEndpoint) Endpoint {
+func NewExternal(ctx context.Context, stype linux.SockType, uid UniqueIDProvider, queue *waiter.Queue, receiver Receiver, connected ConnectedEndpoint) Endpoint {
return &connectionedEndpoint{
baseEndpoint: baseEndpoint{Queue: queue, receiver: receiver, connected: connected},
id: uid.UniqueID(),
@@ -238,7 +246,7 @@ func (e *connectionedEndpoint) Close() {
}
// BidirectionalConnect implements BoundEndpoint.BidirectionalConnect.
-func (e *connectionedEndpoint) BidirectionalConnect(ce ConnectingEndpoint, returnConnect func(Receiver, ConnectedEndpoint)) *syserr.Error {
+func (e *connectionedEndpoint) BidirectionalConnect(ctx context.Context, ce ConnectingEndpoint, returnConnect func(Receiver, ConnectedEndpoint)) *syserr.Error {
if ce.Type() != e.stype {
return syserr.ErrConnectionRefused
}
@@ -288,12 +296,14 @@ func (e *connectionedEndpoint) BidirectionalConnect(ce ConnectingEndpoint, retur
}
readQueue := &queue{ReaderQueue: ce.WaiterQueue(), WriterQueue: ne.Queue, limit: initialLimit}
+ readQueue.EnableLeakCheck("transport.queue")
ne.connected = &connectedEndpoint{
endpoint: ce,
writeQueue: readQueue,
}
writeQueue := &queue{ReaderQueue: ne.Queue, WriterQueue: ce.WaiterQueue(), limit: initialLimit}
+ writeQueue.EnableLeakCheck("transport.queue")
if e.stype == linux.SOCK_STREAM {
ne.receiver = &streamQueueReceiver{queueReceiver: queueReceiver{readQueue: writeQueue}}
} else {
@@ -334,19 +344,19 @@ func (e *connectionedEndpoint) BidirectionalConnect(ce ConnectingEndpoint, retur
}
// UnidirectionalConnect implements BoundEndpoint.UnidirectionalConnect.
-func (e *connectionedEndpoint) UnidirectionalConnect() (ConnectedEndpoint, *syserr.Error) {
+func (e *connectionedEndpoint) UnidirectionalConnect(ctx context.Context) (ConnectedEndpoint, *syserr.Error) {
return nil, syserr.ErrConnectionRefused
}
// Connect attempts to directly connect to another Endpoint.
// Implements Endpoint.Connect.
-func (e *connectionedEndpoint) Connect(server BoundEndpoint) *syserr.Error {
+func (e *connectionedEndpoint) Connect(ctx context.Context, server BoundEndpoint) *syserr.Error {
returnConnect := func(r Receiver, ce ConnectedEndpoint) {
e.receiver = r
e.connected = ce
}
- return server.BidirectionalConnect(e, returnConnect)
+ return server.BidirectionalConnect(ctx, e, returnConnect)
}
// Listen starts listening on the connection.
@@ -426,13 +436,13 @@ func (e *connectionedEndpoint) Bind(addr tcpip.FullAddress, commit func() *syser
// SendMsg writes data and a control message to the endpoint's peer.
// This method does not block if the data cannot be written.
-func (e *connectionedEndpoint) SendMsg(data [][]byte, c ControlMessages, to BoundEndpoint) (uintptr, *syserr.Error) {
+func (e *connectionedEndpoint) SendMsg(ctx context.Context, data [][]byte, c ControlMessages, to BoundEndpoint) (uintptr, *syserr.Error) {
// Stream sockets do not support specifying the endpoint. Seqpacket
// sockets ignore the passed endpoint.
if e.stype == linux.SOCK_STREAM && to != nil {
return 0, syserr.ErrNotSupported
}
- return e.baseEndpoint.SendMsg(data, c, to)
+ return e.baseEndpoint.SendMsg(ctx, data, c, to)
}
// Readiness returns the current readiness of the connectionedEndpoint. For
diff --git a/pkg/sentry/socket/unix/transport/connectionless.go b/pkg/sentry/socket/unix/transport/connectionless.go
index 81ebfba10..c7f7c5b16 100644
--- a/pkg/sentry/socket/unix/transport/connectionless.go
+++ b/pkg/sentry/socket/unix/transport/connectionless.go
@@ -15,14 +15,15 @@
package transport
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/syserr"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/syserr"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// connectionlessEndpoint is a unix endpoint for unix sockets that support operating in
-// a conectionless fashon.
+// a connectionless fashon.
//
// Specifically, this means datagram unix sockets not created with
// socketpair(2).
@@ -32,10 +33,17 @@ type connectionlessEndpoint struct {
baseEndpoint
}
+var (
+ _ = BoundEndpoint((*connectionlessEndpoint)(nil))
+ _ = Endpoint((*connectionlessEndpoint)(nil))
+)
+
// NewConnectionless creates a new unbound dgram endpoint.
-func NewConnectionless() Endpoint {
+func NewConnectionless(ctx context.Context) Endpoint {
ep := &connectionlessEndpoint{baseEndpoint{Queue: &waiter.Queue{}}}
- ep.receiver = &queueReceiver{readQueue: &queue{ReaderQueue: ep.Queue, WriterQueue: &waiter.Queue{}, limit: initialLimit}}
+ q := queue{ReaderQueue: ep.Queue, WriterQueue: &waiter.Queue{}, limit: initialLimit}
+ q.EnableLeakCheck("transport.queue")
+ ep.receiver = &queueReceiver{readQueue: &q}
return ep
}
@@ -46,38 +54,33 @@ func (e *connectionlessEndpoint) isBound() bool {
// Close puts the endpoint in a closed state and frees all resources associated
// with it.
-//
-// The socket will be a fresh state after a call to close and may be reused.
-// That is, close may be used to "unbind" or "disconnect" the socket in error
-// paths.
func (e *connectionlessEndpoint) Close() {
e.Lock()
- var r Receiver
- if e.Connected() {
- e.receiver.CloseRecv()
- r = e.receiver
- e.receiver = nil
-
+ if e.connected != nil {
e.connected.Release()
e.connected = nil
}
+
if e.isBound() {
e.path = ""
}
+
+ e.receiver.CloseRecv()
+ r := e.receiver
+ e.receiver = nil
e.Unlock()
- if r != nil {
- r.CloseNotify()
- r.Release()
- }
+
+ r.CloseNotify()
+ r.Release()
}
// BidirectionalConnect implements BoundEndpoint.BidirectionalConnect.
-func (e *connectionlessEndpoint) BidirectionalConnect(ce ConnectingEndpoint, returnConnect func(Receiver, ConnectedEndpoint)) *syserr.Error {
+func (e *connectionlessEndpoint) BidirectionalConnect(ctx context.Context, ce ConnectingEndpoint, returnConnect func(Receiver, ConnectedEndpoint)) *syserr.Error {
return syserr.ErrConnectionRefused
}
// UnidirectionalConnect implements BoundEndpoint.UnidirectionalConnect.
-func (e *connectionlessEndpoint) UnidirectionalConnect() (ConnectedEndpoint, *syserr.Error) {
+func (e *connectionlessEndpoint) UnidirectionalConnect(ctx context.Context) (ConnectedEndpoint, *syserr.Error) {
e.Lock()
r := e.receiver
e.Unlock()
@@ -96,12 +99,12 @@ func (e *connectionlessEndpoint) UnidirectionalConnect() (ConnectedEndpoint, *sy
// SendMsg writes data and a control message to the specified endpoint.
// This method does not block if the data cannot be written.
-func (e *connectionlessEndpoint) SendMsg(data [][]byte, c ControlMessages, to BoundEndpoint) (uintptr, *syserr.Error) {
+func (e *connectionlessEndpoint) SendMsg(ctx context.Context, data [][]byte, c ControlMessages, to BoundEndpoint) (uintptr, *syserr.Error) {
if to == nil {
- return e.baseEndpoint.SendMsg(data, c, nil)
+ return e.baseEndpoint.SendMsg(ctx, data, c, nil)
}
- connected, err := to.UnidirectionalConnect()
+ connected, err := to.UnidirectionalConnect(ctx)
if err != nil {
return 0, syserr.ErrInvalidEndpointState
}
@@ -124,13 +127,16 @@ func (e *connectionlessEndpoint) Type() linux.SockType {
}
// Connect attempts to connect directly to server.
-func (e *connectionlessEndpoint) Connect(server BoundEndpoint) *syserr.Error {
- connected, err := server.UnidirectionalConnect()
+func (e *connectionlessEndpoint) Connect(ctx context.Context, server BoundEndpoint) *syserr.Error {
+ connected, err := server.UnidirectionalConnect(ctx)
if err != nil {
return err
}
e.Lock()
+ if e.connected != nil {
+ e.connected.Release()
+ }
e.connected = connected
e.Unlock()
diff --git a/pkg/sentry/socket/unix/transport/queue.go b/pkg/sentry/socket/unix/transport/queue.go
index b650caae7..0415fae9a 100644
--- a/pkg/sentry/socket/unix/transport/queue.go
+++ b/pkg/sentry/socket/unix/transport/queue.go
@@ -17,9 +17,9 @@ package transport
import (
"sync"
- "gvisor.googlesource.com/gvisor/pkg/refs"
- "gvisor.googlesource.com/gvisor/pkg/syserr"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/syserr"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// queue is a buffer queue.
@@ -100,7 +100,7 @@ func (q *queue) IsWritable() bool {
// Enqueue adds an entry to the data queue if room is available.
//
-// If truncate is true, Enqueue may truncate the message beforing enqueuing it.
+// If truncate is true, Enqueue may truncate the message before enqueuing it.
// Otherwise, the entire message must fit. If n < e.Length(), err indicates why.
//
// If notify is true, ReaderQueue.Notify must be called:
diff --git a/pkg/sentry/socket/unix/transport/unix.go b/pkg/sentry/socket/unix/transport/unix.go
index 5c55c529e..b0765ba55 100644
--- a/pkg/sentry/socket/unix/transport/unix.go
+++ b/pkg/sentry/socket/unix/transport/unix.go
@@ -19,11 +19,12 @@ import (
"sync"
"sync/atomic"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/syserr"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/syserr"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// initialLimit is the starting limit for the socket buffers.
@@ -120,13 +121,13 @@ type Endpoint interface {
// CMTruncated indicates that the numRights hint was used to receive fewer
// than the total available SCM_RIGHTS FDs. Additional truncation may be
// required by the caller.
- RecvMsg(data [][]byte, creds bool, numRights uintptr, peek bool, addr *tcpip.FullAddress) (recvLen, msgLen uintptr, cm ControlMessages, CMTruncated bool, err *syserr.Error)
+ RecvMsg(ctx context.Context, data [][]byte, creds bool, numRights uintptr, peek bool, addr *tcpip.FullAddress) (recvLen, msgLen uintptr, cm ControlMessages, CMTruncated bool, err *syserr.Error)
// SendMsg writes data and a control message to the endpoint's peer.
// This method does not block if the data cannot be written.
//
// SendMsg does not take ownership of any of its arguments on error.
- SendMsg([][]byte, ControlMessages, BoundEndpoint) (uintptr, *syserr.Error)
+ SendMsg(context.Context, [][]byte, ControlMessages, BoundEndpoint) (uintptr, *syserr.Error)
// Connect connects this endpoint directly to another.
//
@@ -134,7 +135,7 @@ type Endpoint interface {
// endpoint passed in as a parameter.
//
// The error codes are the same as Connect.
- Connect(server BoundEndpoint) *syserr.Error
+ Connect(ctx context.Context, server BoundEndpoint) *syserr.Error
// Shutdown closes the read and/or write end of the endpoint connection
// to its peer.
@@ -215,7 +216,7 @@ type BoundEndpoint interface {
//
// This method will return syserr.ErrConnectionRefused on endpoints with a
// type that isn't SockStream or SockSeqpacket.
- BidirectionalConnect(ep ConnectingEndpoint, returnConnect func(Receiver, ConnectedEndpoint)) *syserr.Error
+ BidirectionalConnect(ctx context.Context, ep ConnectingEndpoint, returnConnect func(Receiver, ConnectedEndpoint)) *syserr.Error
// UnidirectionalConnect establishes a write-only connection to a unix
// endpoint.
@@ -225,7 +226,7 @@ type BoundEndpoint interface {
//
// This method will return syserr.ErrConnectionRefused on a non-SockDgram
// endpoint.
- UnidirectionalConnect() (ConnectedEndpoint, *syserr.Error)
+ UnidirectionalConnect(ctx context.Context) (ConnectedEndpoint, *syserr.Error)
// Passcred returns whether or not the SO_PASSCRED socket option is
// enabled on this end.
@@ -776,7 +777,7 @@ func (e *baseEndpoint) Connected() bool {
}
// RecvMsg reads data and a control message from the endpoint.
-func (e *baseEndpoint) RecvMsg(data [][]byte, creds bool, numRights uintptr, peek bool, addr *tcpip.FullAddress) (uintptr, uintptr, ControlMessages, bool, *syserr.Error) {
+func (e *baseEndpoint) RecvMsg(ctx context.Context, data [][]byte, creds bool, numRights uintptr, peek bool, addr *tcpip.FullAddress) (uintptr, uintptr, ControlMessages, bool, *syserr.Error) {
e.Lock()
if e.receiver == nil {
@@ -802,7 +803,7 @@ func (e *baseEndpoint) RecvMsg(data [][]byte, creds bool, numRights uintptr, pee
// SendMsg writes data and a control message to the endpoint's peer.
// This method does not block if the data cannot be written.
-func (e *baseEndpoint) SendMsg(data [][]byte, c ControlMessages, to BoundEndpoint) (uintptr, *syserr.Error) {
+func (e *baseEndpoint) SendMsg(ctx context.Context, data [][]byte, c ControlMessages, to BoundEndpoint) (uintptr, *syserr.Error) {
e.Lock()
if !e.Connected() {
e.Unlock()
diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go
index b07e8d67b..637168714 100644
--- a/pkg/sentry/socket/unix/unix.go
+++ b/pkg/sentry/socket/unix/unix.go
@@ -21,24 +21,23 @@ import (
"strings"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/refs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/control"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/epsocket"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserr"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/socket"
+ "gvisor.dev/gvisor/pkg/sentry/socket/control"
+ "gvisor.dev/gvisor/pkg/sentry/socket/epsocket"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserr"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// SocketOperations is a Unix socket. It is similar to an epsocket, except it
@@ -64,15 +63,18 @@ type SocketOperations struct {
func New(ctx context.Context, endpoint transport.Endpoint, stype linux.SockType) *fs.File {
dirent := socket.NewDirent(ctx, unixSocketDevice)
defer dirent.DecRef()
- return NewWithDirent(ctx, dirent, endpoint, stype, fs.FileFlags{Read: true, Write: true})
+ return NewWithDirent(ctx, dirent, endpoint, stype, fs.FileFlags{Read: true, Write: true, NonSeekable: true})
}
// NewWithDirent creates a new unix socket using an existing dirent.
func NewWithDirent(ctx context.Context, d *fs.Dirent, ep transport.Endpoint, stype linux.SockType, flags fs.FileFlags) *fs.File {
- return fs.NewFile(ctx, d, flags, &SocketOperations{
+ s := SocketOperations{
ep: ep,
stype: stype,
- })
+ }
+ s.EnableLeakCheck("unix.SocketOperations")
+
+ return fs.NewFile(ctx, d, flags, &s)
}
// DecRef implements RefCounter.DecRef.
@@ -108,7 +110,7 @@ func (s *SocketOperations) Endpoint() transport.Endpoint {
// extractPath extracts and validates the address.
func extractPath(sockaddr []byte) (string, *syserr.Error) {
- addr, err := epsocket.GetAddress(linux.AF_UNIX, sockaddr)
+ addr, err := epsocket.GetAddress(linux.AF_UNIX, sockaddr, true /* strict */)
if err != nil {
return "", err
}
@@ -152,7 +154,7 @@ func (s *SocketOperations) GetSockName(t *kernel.Task) (interface{}, uint32, *sy
}
// Ioctl implements fs.FileOperations.Ioctl.
-func (s *SocketOperations) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+func (s *SocketOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
return epsocket.Ioctl(ctx, s.ep, io, args)
}
@@ -191,7 +193,7 @@ func (s *SocketOperations) blockingAccept(t *kernel.Task) (transport.Endpoint, *
// Accept implements the linux syscall accept(2) for sockets backed by
// a transport.Endpoint.
-func (s *SocketOperations) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (kdefs.FD, interface{}, uint32, *syserr.Error) {
+func (s *SocketOperations) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (int32, interface{}, uint32, *syserr.Error) {
// Issue the accept request to get the new endpoint.
ep, err := s.ep.Accept()
if err != nil {
@@ -226,10 +228,9 @@ func (s *SocketOperations) Accept(t *kernel.Task, peerRequested bool, flags int,
}
}
- fdFlags := kernel.FDFlags{
+ fd, e := t.NewFDFrom(0, ns, kernel.FDFlags{
CloseOnExec: flags&linux.SOCK_CLOEXEC != 0,
- }
- fd, e := t.FDMap().NewFDFrom(0, ns, fdFlags, t.ThreadGroup().Limits())
+ })
if e != nil {
return 0, nil, 0, syserr.FromError(e)
}
@@ -363,7 +364,7 @@ func (s *SocketOperations) Connect(t *kernel.Task, sockaddr []byte, blocking boo
defer ep.Release()
// Connect the server endpoint.
- return s.ep.Connect(ep)
+ return s.ep.Connect(t, ep)
}
// Writev implements fs.FileOperations.Write.
@@ -372,11 +373,12 @@ func (s *SocketOperations) Write(ctx context.Context, _ *fs.File, src usermem.IO
ctrl := control.New(t, s.ep, nil)
if src.NumBytes() == 0 {
- nInt, err := s.ep.SendMsg([][]byte{}, ctrl, nil)
+ nInt, err := s.ep.SendMsg(ctx, [][]byte{}, ctrl, nil)
return int64(nInt), err.ToError()
}
return src.CopyInTo(ctx, &EndpointWriter{
+ Ctx: ctx,
Endpoint: s.ep,
Control: ctrl,
To: nil,
@@ -387,6 +389,7 @@ func (s *SocketOperations) Write(ctx context.Context, _ *fs.File, src usermem.IO
// a transport.Endpoint.
func (s *SocketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to []byte, flags int, haveDeadline bool, deadline ktime.Time, controlMessages socket.ControlMessages) (int, *syserr.Error) {
w := EndpointWriter{
+ Ctx: t,
Endpoint: s.ep,
Control: controlMessages.Unix,
To: nil,
@@ -486,6 +489,7 @@ func (s *SocketOperations) Read(ctx context.Context, _ *fs.File, dst usermem.IOS
return 0, nil
}
return dst.CopyOutFrom(ctx, &EndpointReader{
+ Ctx: ctx,
Endpoint: s.ep,
NumRights: 0,
Peek: false,
@@ -522,6 +526,7 @@ func (s *SocketOperations) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags
}
r := EndpointReader{
+ Ctx: t,
Endpoint: s.ep,
Creds: wantCreds,
NumRights: uintptr(numRights),
@@ -635,9 +640,9 @@ func (*provider) Socket(t *kernel.Task, stype linux.SockType, protocol int) (*fs
var ep transport.Endpoint
switch stype {
case linux.SOCK_DGRAM:
- ep = transport.NewConnectionless()
+ ep = transport.NewConnectionless(t)
case linux.SOCK_SEQPACKET, linux.SOCK_STREAM:
- ep = transport.NewConnectioned(stype, t.Kernel())
+ ep = transport.NewConnectioned(t, stype, t.Kernel())
default:
return nil, syserr.ErrInvalidArgument
}
@@ -660,7 +665,7 @@ func (*provider) Pair(t *kernel.Task, stype linux.SockType, protocol int) (*fs.F
}
// Create the endpoints and sockets.
- ep1, ep2 := transport.NewPair(stype, t.Kernel())
+ ep1, ep2 := transport.NewPair(t, stype, t.Kernel())
s1 := New(t, ep1, stype)
s2 := New(t, ep2, stype)
diff --git a/pkg/sentry/state/BUILD b/pkg/sentry/state/BUILD
index cee18f681..f297ef3b7 100644
--- a/pkg/sentry/state/BUILD
+++ b/pkg/sentry/state/BUILD
@@ -9,7 +9,7 @@ go_library(
"state_metadata.go",
"state_unsafe.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/state",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/state",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
diff --git a/pkg/sentry/state/state.go b/pkg/sentry/state/state.go
index 27fde505b..026549756 100644
--- a/pkg/sentry/state/state.go
+++ b/pkg/sentry/state/state.go
@@ -19,12 +19,12 @@ import (
"fmt"
"io"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/inet"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/watchdog"
- "gvisor.googlesource.com/gvisor/pkg/state/statefile"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/inet"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/watchdog"
+ "gvisor.dev/gvisor/pkg/state/statefile"
+ "gvisor.dev/gvisor/pkg/syserror"
)
var previousMetadata map[string]string
diff --git a/pkg/sentry/state/state_metadata.go b/pkg/sentry/state/state_metadata.go
index b8e128c40..cefd20b9b 100644
--- a/pkg/sentry/state/state_metadata.go
+++ b/pkg/sentry/state/state_metadata.go
@@ -18,7 +18,7 @@ import (
"fmt"
"time"
- "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/log"
)
// The save metadata keys for timestamp.
diff --git a/pkg/sentry/state/state_unsafe.go b/pkg/sentry/state/state_unsafe.go
index 7745b6ac6..d271c6fc9 100644
--- a/pkg/sentry/state/state_unsafe.go
+++ b/pkg/sentry/state/state_unsafe.go
@@ -20,7 +20,7 @@ import (
"time"
"unsafe"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/abi/linux"
)
// CPUTime returns the CPU time usage by Sentry and app.
diff --git a/pkg/sentry/strace/BUILD b/pkg/sentry/strace/BUILD
index eaaa4d118..445d25010 100644
--- a/pkg/sentry/strace/BUILD
+++ b/pkg/sentry/strace/BUILD
@@ -18,7 +18,7 @@ go_library(
"strace.go",
"syscalls.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/strace",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/strace",
visibility = ["//:sandbox"],
deps = [
":strace_go_proto",
@@ -30,7 +30,6 @@ go_library(
"//pkg/seccomp",
"//pkg/sentry/arch",
"//pkg/sentry/kernel",
- "//pkg/sentry/kernel/kdefs",
"//pkg/sentry/socket/control",
"//pkg/sentry/socket/epsocket",
"//pkg/sentry/socket/netlink",
@@ -47,7 +46,7 @@ proto_library(
go_proto_library(
name = "strace_go_proto",
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/strace/strace_go_proto",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/strace/strace_go_proto",
proto = ":strace_proto",
visibility = ["//visibility:public"],
)
diff --git a/pkg/sentry/strace/capability.go b/pkg/sentry/strace/capability.go
index f85d6636e..3255dc18d 100644
--- a/pkg/sentry/strace/capability.go
+++ b/pkg/sentry/strace/capability.go
@@ -15,11 +15,11 @@
package strace
import (
- "gvisor.googlesource.com/gvisor/pkg/abi"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/abi"
+ "gvisor.dev/gvisor/pkg/abi/linux"
)
-// CapabilityBitset is the set of capabilties in a bitset.
+// CapabilityBitset is the set of capabilities in a bitset.
var CapabilityBitset = abi.FlagSet{
{
Flag: 1 << uint32(linux.CAP_CHOWN),
diff --git a/pkg/sentry/strace/clone.go b/pkg/sentry/strace/clone.go
index ff6a432c6..e99158712 100644
--- a/pkg/sentry/strace/clone.go
+++ b/pkg/sentry/strace/clone.go
@@ -17,7 +17,7 @@ package strace
import (
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/abi"
+ "gvisor.dev/gvisor/pkg/abi"
)
// CloneFlagSet is the set of clone(2) flags.
diff --git a/pkg/sentry/strace/futex.go b/pkg/sentry/strace/futex.go
index 24301bda6..d55c4080e 100644
--- a/pkg/sentry/strace/futex.go
+++ b/pkg/sentry/strace/futex.go
@@ -15,8 +15,8 @@
package strace
import (
- "gvisor.googlesource.com/gvisor/pkg/abi"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/abi"
+ "gvisor.dev/gvisor/pkg/abi/linux"
)
// FutexCmd are the possible futex(2) commands.
diff --git a/pkg/sentry/strace/open.go b/pkg/sentry/strace/open.go
index 140727b02..e40bcb53b 100644
--- a/pkg/sentry/strace/open.go
+++ b/pkg/sentry/strace/open.go
@@ -17,7 +17,7 @@ package strace
import (
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/abi"
+ "gvisor.dev/gvisor/pkg/abi"
)
// OpenMode represents the mode to open(2) a file.
diff --git a/pkg/sentry/strace/poll.go b/pkg/sentry/strace/poll.go
index 15605187d..5187594a7 100644
--- a/pkg/sentry/strace/poll.go
+++ b/pkg/sentry/strace/poll.go
@@ -18,12 +18,11 @@ import (
"fmt"
"strings"
- "gvisor.googlesource.com/gvisor/pkg/abi"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- slinux "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/abi"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// PollEventSet is the set of poll(2) event flags.
@@ -50,7 +49,7 @@ func pollFD(t *kernel.Task, pfd *linux.PollFD, post bool) string {
if post {
revents = PollEventSet.Parse(uint64(pfd.REvents))
}
- return fmt.Sprintf("{FD: %s, Events: %s, REvents: %s}", fd(t, kdefs.FD(pfd.FD)), PollEventSet.Parse(uint64(pfd.Events)), revents)
+ return fmt.Sprintf("{FD: %s, Events: %s, REvents: %s}", fd(t, pfd.FD), PollEventSet.Parse(uint64(pfd.Events)), revents)
}
func pollFDs(t *kernel.Task, addr usermem.Addr, nfds uint, post bool) string {
diff --git a/pkg/sentry/strace/ptrace.go b/pkg/sentry/strace/ptrace.go
index 485aacb8a..338bafc6c 100644
--- a/pkg/sentry/strace/ptrace.go
+++ b/pkg/sentry/strace/ptrace.go
@@ -15,8 +15,8 @@
package strace
import (
- "gvisor.googlesource.com/gvisor/pkg/abi"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/abi"
+ "gvisor.dev/gvisor/pkg/abi/linux"
)
// PtraceRequestSet are the possible ptrace(2) requests.
diff --git a/pkg/sentry/strace/signal.go b/pkg/sentry/strace/signal.go
index f82460e1c..5656d53eb 100644
--- a/pkg/sentry/strace/signal.go
+++ b/pkg/sentry/strace/signal.go
@@ -18,10 +18,10 @@ import (
"fmt"
"strings"
- "gvisor.googlesource.com/gvisor/pkg/abi"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/abi"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// signalNames contains the names of all named signals.
diff --git a/pkg/sentry/strace/socket.go b/pkg/sentry/strace/socket.go
index 0b5ef84c4..386b40af7 100644
--- a/pkg/sentry/strace/socket.go
+++ b/pkg/sentry/strace/socket.go
@@ -18,15 +18,15 @@ import (
"fmt"
"strings"
- "gvisor.googlesource.com/gvisor/pkg/abi"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/binary"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/control"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/epsocket"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/netlink"
- slinux "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/abi"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/socket/control"
+ "gvisor.dev/gvisor/pkg/sentry/socket/epsocket"
+ "gvisor.dev/gvisor/pkg/sentry/socket/netlink"
+ slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// SocketFamily are the possible socket(2) families.
@@ -332,7 +332,7 @@ func sockAddr(t *kernel.Task, addr usermem.Addr, length uint32) string {
switch family {
case linux.AF_INET, linux.AF_INET6, linux.AF_UNIX:
- fa, err := epsocket.GetAddress(int(family), b)
+ fa, err := epsocket.GetAddress(int(family), b, true /* strict */)
if err != nil {
return fmt.Sprintf("%#x {Family: %s, error extracting address: %v}", addr, familyStr, err)
}
diff --git a/pkg/sentry/strace/strace.go b/pkg/sentry/strace/strace.go
index f4c1be4ce..311389547 100644
--- a/pkg/sentry/strace/strace.go
+++ b/pkg/sentry/strace/strace.go
@@ -24,17 +24,16 @@ import (
"syscall"
"time"
- "gvisor.googlesource.com/gvisor/pkg/abi"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/bits"
- "gvisor.googlesource.com/gvisor/pkg/eventchannel"
- "gvisor.googlesource.com/gvisor/pkg/seccomp"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- pb "gvisor.googlesource.com/gvisor/pkg/sentry/strace/strace_go_proto"
- slinux "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/abi"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/bits"
+ "gvisor.dev/gvisor/pkg/eventchannel"
+ "gvisor.dev/gvisor/pkg/seccomp"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ pb "gvisor.dev/gvisor/pkg/sentry/strace/strace_go_proto"
+ slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// DefaultLogMaximumSize is the default LogMaximumSize.
@@ -133,7 +132,7 @@ func path(t *kernel.Task, addr usermem.Addr) string {
return fmt.Sprintf("%#x %s", addr, path)
}
-func fd(t *kernel.Task, fd kdefs.FD) string {
+func fd(t *kernel.Task, fd int32) string {
root := t.FSContext().RootDirectory()
if root != nil {
defer root.DecRef()
@@ -151,7 +150,7 @@ func fd(t *kernel.Task, fd kdefs.FD) string {
return fmt.Sprintf("AT_FDCWD %s", name)
}
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
// Cast FD to uint64 to avoid printing negative hex.
return fmt.Sprintf("%#x (bad FD)", uint64(fd))
@@ -375,7 +374,7 @@ func (i *SyscallInfo) pre(t *kernel.Task, args arch.SyscallArguments, maximumBlo
}
switch i.format[arg] {
case FD:
- output = append(output, fd(t, kdefs.FD(args[arg].Int())))
+ output = append(output, fd(t, args[arg].Int()))
case WriteBuffer:
output = append(output, dump(t, args[arg].Pointer(), args[arg+1].SizeT(), maximumBlobSize))
case WriteIOVec:
diff --git a/pkg/sentry/strace/syscalls.go b/pkg/sentry/strace/syscalls.go
index eae2d6c12..3c389d375 100644
--- a/pkg/sentry/strace/syscalls.go
+++ b/pkg/sentry/strace/syscalls.go
@@ -15,9 +15,9 @@
package strace
import (
- "gvisor.googlesource.com/gvisor/pkg/abi"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/abi"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
)
// FormatSpecifier values describe how an individual syscall argument should be
diff --git a/pkg/sentry/syscalls/BUILD b/pkg/sentry/syscalls/BUILD
index 877318fa9..79d972202 100644
--- a/pkg/sentry/syscalls/BUILD
+++ b/pkg/sentry/syscalls/BUILD
@@ -8,14 +8,13 @@ go_library(
"epoll.go",
"syscalls.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/syscalls",
visibility = ["//:sandbox"],
deps = [
"//pkg/abi/linux",
"//pkg/sentry/arch",
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/epoll",
- "//pkg/sentry/kernel/kdefs",
"//pkg/sentry/kernel/time",
"//pkg/syserror",
"//pkg/waiter",
diff --git a/pkg/sentry/syscalls/epoll.go b/pkg/sentry/syscalls/epoll.go
index ec1eab331..87dcad18b 100644
--- a/pkg/sentry/syscalls/epoll.go
+++ b/pkg/sentry/syscalls/epoll.go
@@ -15,25 +15,23 @@
package syscalls
import (
- "syscall"
"time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/epoll"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/epoll"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// CreateEpoll implements the epoll_create(2) linux syscall.
-func CreateEpoll(t *kernel.Task, closeOnExec bool) (kdefs.FD, error) {
+func CreateEpoll(t *kernel.Task, closeOnExec bool) (int32, error) {
file := epoll.NewEventPoll(t)
defer file.DecRef()
- flags := kernel.FDFlags{
+ fd, err := t.NewFDFrom(0, file, kernel.FDFlags{
CloseOnExec: closeOnExec,
- }
- fd, err := t.FDMap().NewFDFrom(0, file, flags, t.ThreadGroup().Limits())
+ })
if err != nil {
return 0, err
}
@@ -42,25 +40,25 @@ func CreateEpoll(t *kernel.Task, closeOnExec bool) (kdefs.FD, error) {
}
// AddEpoll implements the epoll_ctl(2) linux syscall when op is EPOLL_CTL_ADD.
-func AddEpoll(t *kernel.Task, epfd kdefs.FD, fd kdefs.FD, flags epoll.EntryFlags, mask waiter.EventMask, userData [2]int32) error {
+func AddEpoll(t *kernel.Task, epfd int32, fd int32, flags epoll.EntryFlags, mask waiter.EventMask, userData [2]int32) error {
// Get epoll from the file descriptor.
- epollfile := t.FDMap().GetFile(epfd)
+ epollfile := t.GetFile(epfd)
if epollfile == nil {
- return syscall.EBADF
+ return syserror.EBADF
}
defer epollfile.DecRef()
// Get the target file id.
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
- return syscall.EBADF
+ return syserror.EBADF
}
defer file.DecRef()
// Extract the epollPoll operations.
e, ok := epollfile.FileOperations.(*epoll.EventPoll)
if !ok {
- return syscall.EBADF
+ return syserror.EBADF
}
// Try to add the entry.
@@ -68,25 +66,25 @@ func AddEpoll(t *kernel.Task, epfd kdefs.FD, fd kdefs.FD, flags epoll.EntryFlags
}
// UpdateEpoll implements the epoll_ctl(2) linux syscall when op is EPOLL_CTL_MOD.
-func UpdateEpoll(t *kernel.Task, epfd kdefs.FD, fd kdefs.FD, flags epoll.EntryFlags, mask waiter.EventMask, userData [2]int32) error {
+func UpdateEpoll(t *kernel.Task, epfd int32, fd int32, flags epoll.EntryFlags, mask waiter.EventMask, userData [2]int32) error {
// Get epoll from the file descriptor.
- epollfile := t.FDMap().GetFile(epfd)
+ epollfile := t.GetFile(epfd)
if epollfile == nil {
- return syscall.EBADF
+ return syserror.EBADF
}
defer epollfile.DecRef()
// Get the target file id.
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
- return syscall.EBADF
+ return syserror.EBADF
}
defer file.DecRef()
// Extract the epollPoll operations.
e, ok := epollfile.FileOperations.(*epoll.EventPoll)
if !ok {
- return syscall.EBADF
+ return syserror.EBADF
}
// Try to update the entry.
@@ -94,25 +92,25 @@ func UpdateEpoll(t *kernel.Task, epfd kdefs.FD, fd kdefs.FD, flags epoll.EntryFl
}
// RemoveEpoll implements the epoll_ctl(2) linux syscall when op is EPOLL_CTL_DEL.
-func RemoveEpoll(t *kernel.Task, epfd kdefs.FD, fd kdefs.FD) error {
+func RemoveEpoll(t *kernel.Task, epfd int32, fd int32) error {
// Get epoll from the file descriptor.
- epollfile := t.FDMap().GetFile(epfd)
+ epollfile := t.GetFile(epfd)
if epollfile == nil {
- return syscall.EBADF
+ return syserror.EBADF
}
defer epollfile.DecRef()
// Get the target file id.
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
- return syscall.EBADF
+ return syserror.EBADF
}
defer file.DecRef()
// Extract the epollPoll operations.
e, ok := epollfile.FileOperations.(*epoll.EventPoll)
if !ok {
- return syscall.EBADF
+ return syserror.EBADF
}
// Try to remove the entry.
@@ -120,18 +118,18 @@ func RemoveEpoll(t *kernel.Task, epfd kdefs.FD, fd kdefs.FD) error {
}
// WaitEpoll implements the epoll_wait(2) linux syscall.
-func WaitEpoll(t *kernel.Task, fd kdefs.FD, max int, timeout int) ([]epoll.Event, error) {
+func WaitEpoll(t *kernel.Task, fd int32, max int, timeout int) ([]epoll.Event, error) {
// Get epoll from the file descriptor.
- epollfile := t.FDMap().GetFile(fd)
+ epollfile := t.GetFile(fd)
if epollfile == nil {
- return nil, syscall.EBADF
+ return nil, syserror.EBADF
}
defer epollfile.DecRef()
// Extract the epollPoll operations.
e, ok := epollfile.FileOperations.(*epoll.EventPoll)
if !ok {
- return nil, syscall.EBADF
+ return nil, syserror.EBADF
}
// Try to read events and return right away if we got them or if the
@@ -164,7 +162,7 @@ func WaitEpoll(t *kernel.Task, fd kdefs.FD, max int, timeout int) ([]epoll.Event
}
if err := t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil {
- if err == syscall.ETIMEDOUT {
+ if err == syserror.ETIMEDOUT {
return nil, nil
}
diff --git a/pkg/sentry/syscalls/linux/BUILD b/pkg/sentry/syscalls/linux/BUILD
index 1c057526b..33a40b9c6 100644
--- a/pkg/sentry/syscalls/linux/BUILD
+++ b/pkg/sentry/syscalls/linux/BUILD
@@ -49,7 +49,7 @@ go_library(
"sys_write.go",
"timespec.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls/linux",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/syscalls/linux",
visibility = ["//:sandbox"],
deps = [
"//pkg/abi",
@@ -71,7 +71,6 @@ go_library(
"//pkg/sentry/kernel/epoll",
"//pkg/sentry/kernel/eventfd",
"//pkg/sentry/kernel/fasync",
- "//pkg/sentry/kernel/kdefs",
"//pkg/sentry/kernel/pipe",
"//pkg/sentry/kernel/sched",
"//pkg/sentry/kernel/shm",
@@ -86,6 +85,7 @@ go_library(
"//pkg/sentry/syscalls",
"//pkg/sentry/usage",
"//pkg/sentry/usermem",
+ "//pkg/syserr",
"//pkg/syserror",
"//pkg/waiter",
],
diff --git a/pkg/sentry/syscalls/linux/error.go b/pkg/sentry/syscalls/linux/error.go
index 1ba3695fb..264301bfa 100644
--- a/pkg/sentry/syscalls/linux/error.go
+++ b/pkg/sentry/syscalls/linux/error.go
@@ -17,14 +17,13 @@ package linux
import (
"io"
"sync"
- "syscall"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/metric"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/metric"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/syserror"
)
var (
@@ -54,7 +53,7 @@ func handleIOError(t *kernel.Task, partialResult bool, err, intr error, op strin
// Do not consume the error and return it as EFBIG.
// Simultaneously send a SIGXFSZ per setrlimit(2).
t.SendSignal(kernel.SignalInfoNoInfo(linux.SIGXFSZ, t, t))
- return syscall.EFBIG
+ return syserror.EFBIG
case syserror.ErrInterrupted:
// The syscall was interrupted. Return nil if it completed
// partially, otherwise return the error code that the syscall
@@ -92,6 +91,10 @@ func handleIOError(t *kernel.Task, partialResult bool, err, intr error, op strin
// TODO(gvisor.dev/issue/161): In some cases SIGPIPE should
// also be sent to the application.
return nil
+ case syserror.ECONNRESET:
+ // For TCP sendfile connections, we may have a reset. But we
+ // should just return n as the result.
+ return nil
case syserror.ErrWouldBlock:
// Syscall would block, but completed a partial read/write.
// This case should only be returned by IssueIO for nonblocking
diff --git a/pkg/sentry/syscalls/linux/flags.go b/pkg/sentry/syscalls/linux/flags.go
index d83e12971..0c1b5ec27 100644
--- a/pkg/sentry/syscalls/linux/flags.go
+++ b/pkg/sentry/syscalls/linux/flags.go
@@ -15,8 +15,8 @@
package linux
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
)
// flagsToPermissions returns a Permissions object from Linux flags.
diff --git a/pkg/sentry/syscalls/linux/linux64.go b/pkg/sentry/syscalls/linux/linux64.go
index 5251c2463..51db2d8f7 100644
--- a/pkg/sentry/syscalls/linux/linux64.go
+++ b/pkg/sentry/syscalls/linux/linux64.go
@@ -16,15 +16,13 @@
package linux
import (
- "syscall"
-
- "gvisor.googlesource.com/gvisor/pkg/abi"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/syscalls"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// AUDIT_ARCH_X86_64 identifies the Linux syscall API on AMD64, and is taken
@@ -50,335 +48,336 @@ var AMD64 = &kernel.SyscallTable{
Table: map[uintptr]kernel.Syscall{
0: syscalls.Supported("read", Read),
1: syscalls.Supported("write", Write),
- 2: syscalls.Supported("open", Open),
+ 2: syscalls.PartiallySupported("open", Open, "Options O_DIRECT, O_NOATIME, O_PATH, O_TMPFILE, O_SYNC are not supported.", nil),
3: syscalls.Supported("close", Close),
- 4: syscalls.Undocumented("stat", Stat),
- 5: syscalls.Undocumented("fstat", Fstat),
- 6: syscalls.Undocumented("lstat", Lstat),
- 7: syscalls.Undocumented("poll", Poll),
- 8: syscalls.Undocumented("lseek", Lseek),
- 9: syscalls.Undocumented("mmap", Mmap),
- 10: syscalls.Undocumented("mprotect", Mprotect),
- 11: syscalls.Undocumented("munmap", Munmap),
- 12: syscalls.Undocumented("brk", Brk),
- 13: syscalls.Undocumented("rt_sigaction", RtSigaction),
- 14: syscalls.Undocumented("rt_sigprocmask", RtSigprocmask),
- 15: syscalls.Undocumented("rt_sigreturn", RtSigreturn),
- 16: syscalls.Undocumented("ioctl", Ioctl),
- 17: syscalls.Undocumented("pread64", Pread64),
- 18: syscalls.Undocumented("pwrite64", Pwrite64),
- 19: syscalls.Undocumented("readv", Readv),
- 20: syscalls.Undocumented("writev", Writev),
- 21: syscalls.Undocumented("access", Access),
- 22: syscalls.Undocumented("pipe", Pipe),
- 23: syscalls.Undocumented("select", Select),
- 24: syscalls.Undocumented("sched_yield", SchedYield),
- 25: syscalls.Undocumented("mremap", Mremap),
- 26: syscalls.Undocumented("msync", Msync),
- 27: syscalls.Undocumented("mincore", Mincore),
- 28: syscalls.Undocumented("madvise", Madvise),
- 29: syscalls.Undocumented("shmget", Shmget),
- 30: syscalls.Undocumented("shmat", Shmat),
- 31: syscalls.Undocumented("shmctl", Shmctl),
- 32: syscalls.Undocumented("dup", Dup),
- 33: syscalls.Undocumented("dup2", Dup2),
- 34: syscalls.Undocumented("pause", Pause),
- 35: syscalls.Undocumented("nanosleep", Nanosleep),
- 36: syscalls.Undocumented("getitimer", Getitimer),
- 37: syscalls.Undocumented("alarm", Alarm),
- 38: syscalls.Undocumented("setitimer", Setitimer),
- 39: syscalls.Undocumented("getpid", Getpid),
- 40: syscalls.Undocumented("sendfile", Sendfile),
- 41: syscalls.Undocumented("socket", Socket),
- 42: syscalls.Undocumented("connect", Connect),
- 43: syscalls.Undocumented("accept", Accept),
- 44: syscalls.Undocumented("sendto", SendTo),
- 45: syscalls.Undocumented("recvfrom", RecvFrom),
- 46: syscalls.Undocumented("sendmsg", SendMsg),
- 47: syscalls.Undocumented("recvmsg", RecvMsg),
- 48: syscalls.Undocumented("shutdown", Shutdown),
- 49: syscalls.Undocumented("bind", Bind),
- 50: syscalls.Undocumented("listen", Listen),
- 51: syscalls.Undocumented("getsockname", GetSockName),
- 52: syscalls.Undocumented("getpeername", GetPeerName),
- 53: syscalls.Undocumented("socketpair", SocketPair),
- 54: syscalls.Undocumented("setsockopt", SetSockOpt),
- 55: syscalls.Undocumented("getsockopt", GetSockOpt),
- 56: syscalls.Undocumented("clone", Clone),
- 57: syscalls.Undocumented("fork", Fork),
- 58: syscalls.Undocumented("vfork", Vfork),
- 59: syscalls.Undocumented("execve", Execve),
- 60: syscalls.Undocumented("exit", Exit),
- 61: syscalls.Undocumented("wait4", Wait4),
- 62: syscalls.Undocumented("kill", Kill),
- 63: syscalls.Undocumented("uname", Uname),
- 64: syscalls.Undocumented("semget", Semget),
- 65: syscalls.Undocumented("semop", Semop),
- 66: syscalls.Undocumented("semctl", Semctl),
- 67: syscalls.Undocumented("shmdt", Shmdt),
- 68: syscalls.ErrorWithEvent("msgget", syscall.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921)
- 69: syscalls.ErrorWithEvent("msgsnd", syscall.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921)
- 70: syscalls.ErrorWithEvent("msgrcv", syscall.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921)
- 71: syscalls.ErrorWithEvent("msgctl", syscall.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921)
- 72: syscalls.Undocumented("fcntl", Fcntl),
- 73: syscalls.Undocumented("flock", Flock),
- 74: syscalls.Undocumented("fsync", Fsync),
- 75: syscalls.Undocumented("fdatasync", Fdatasync),
- 76: syscalls.Undocumented("truncate", Truncate),
- 77: syscalls.Undocumented("ftruncate", Ftruncate),
- 78: syscalls.Undocumented("getdents", Getdents),
- 79: syscalls.Undocumented("getcwd", Getcwd),
- 80: syscalls.Undocumented("chdir", Chdir),
- 81: syscalls.Undocumented("fchdir", Fchdir),
- 82: syscalls.Undocumented("rename", Rename),
- 83: syscalls.Undocumented("mkdir", Mkdir),
- 84: syscalls.Undocumented("rmdir", Rmdir),
- 85: syscalls.Undocumented("creat", Creat),
- 86: syscalls.Undocumented("link", Link),
- 87: syscalls.Undocumented("link", Unlink),
- 88: syscalls.Undocumented("symlink", Symlink),
- 89: syscalls.Undocumented("readlink", Readlink),
- 90: syscalls.Undocumented("chmod", Chmod),
- 91: syscalls.Undocumented("fchmod", Fchmod),
- 92: syscalls.Undocumented("chown", Chown),
- 93: syscalls.Undocumented("fchown", Fchown),
- 94: syscalls.Undocumented("lchown", Lchown),
- 95: syscalls.Undocumented("umask", Umask),
- 96: syscalls.Undocumented("gettimeofday", Gettimeofday),
- 97: syscalls.Undocumented("getrlimit", Getrlimit),
- 98: syscalls.Undocumented("getrusage", Getrusage),
- 99: syscalls.Undocumented("sysinfo", Sysinfo),
- 100: syscalls.Undocumented("times", Times),
- 101: syscalls.Undocumented("ptrace", Ptrace),
- 102: syscalls.Undocumented("getuid", Getuid),
- 103: syscalls.Undocumented("syslog", Syslog),
- 104: syscalls.Undocumented("getgid", Getgid),
- 105: syscalls.Undocumented("setuid", Setuid),
- 106: syscalls.Undocumented("setgid", Setgid),
- 107: syscalls.Undocumented("geteuid", Geteuid),
- 108: syscalls.Undocumented("getegid", Getegid),
- 109: syscalls.Undocumented("setpgid", Setpgid),
- 110: syscalls.Undocumented("getppid", Getppid),
- 111: syscalls.Undocumented("getpgrp", Getpgrp),
- 112: syscalls.Undocumented("setsid", Setsid),
- 113: syscalls.Undocumented("setreuid", Setreuid),
- 114: syscalls.Undocumented("setregid", Setregid),
- 115: syscalls.Undocumented("getgroups", Getgroups),
- 116: syscalls.Undocumented("setgroups", Setgroups),
- 117: syscalls.Undocumented("setresuid", Setresuid),
- 118: syscalls.Undocumented("getresuid", Getresuid),
- 119: syscalls.Undocumented("setresgid", Setresgid),
- 120: syscalls.Undocumented("setresgid", Getresgid),
- 121: syscalls.Undocumented("getpgid", Getpgid),
- 122: syscalls.ErrorWithEvent("setfsuid", syscall.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702)
- 123: syscalls.ErrorWithEvent("setfsgid", syscall.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702)
- 124: syscalls.Undocumented("getsid", Getsid),
- 125: syscalls.Undocumented("capget", Capget),
- 126: syscalls.Undocumented("capset", Capset),
- 127: syscalls.Undocumented("rt_sigpending", RtSigpending),
- 128: syscalls.Undocumented("rt_sigtimedwait", RtSigtimedwait),
- 129: syscalls.Undocumented("rt_sigqueueinfo", RtSigqueueinfo),
- 130: syscalls.Undocumented("rt_sigsuspend", RtSigsuspend),
- 131: syscalls.Undocumented("sigaltstack", Sigaltstack),
- 132: syscalls.Undocumented("utime", Utime),
- 133: syscalls.Undocumented("mknod", Mknod),
- 134: syscalls.Error("uselib", syscall.ENOSYS, "Obsolete", nil),
- 135: syscalls.ErrorWithEvent("personality", syscall.EINVAL, "Unable to change personality.", nil),
- 136: syscalls.ErrorWithEvent("ustat", syscall.ENOSYS, "Needs filesystem support.", nil),
- 137: syscalls.Undocumented("statfs", Statfs),
- 138: syscalls.Undocumented("fstatfs", Fstatfs),
- 139: syscalls.ErrorWithEvent("sysfs", syscall.ENOSYS, "", []string{"gvisor.dev/issue/165"}),
- 140: syscalls.Undocumented("getpriority", Getpriority),
- 141: syscalls.Undocumented("setpriority", Setpriority),
+ 4: syscalls.Supported("stat", Stat),
+ 5: syscalls.Supported("fstat", Fstat),
+ 6: syscalls.Supported("lstat", Lstat),
+ 7: syscalls.Supported("poll", Poll),
+ 8: syscalls.Supported("lseek", Lseek),
+ 9: syscalls.PartiallySupported("mmap", Mmap, "Generally supported with exceptions. Options MAP_FIXED_NOREPLACE, MAP_SHARED_VALIDATE, MAP_SYNC MAP_GROWSDOWN, MAP_HUGETLB are not supported.", nil),
+ 10: syscalls.Supported("mprotect", Mprotect),
+ 11: syscalls.Supported("munmap", Munmap),
+ 12: syscalls.Supported("brk", Brk),
+ 13: syscalls.Supported("rt_sigaction", RtSigaction),
+ 14: syscalls.Supported("rt_sigprocmask", RtSigprocmask),
+ 15: syscalls.Supported("rt_sigreturn", RtSigreturn),
+ 16: syscalls.PartiallySupported("ioctl", Ioctl, "Only a few ioctls are implemented for backing devices and file systems.", nil),
+ 17: syscalls.Supported("pread64", Pread64),
+ 18: syscalls.Supported("pwrite64", Pwrite64),
+ 19: syscalls.Supported("readv", Readv),
+ 20: syscalls.Supported("writev", Writev),
+ 21: syscalls.Supported("access", Access),
+ 22: syscalls.Supported("pipe", Pipe),
+ 23: syscalls.Supported("select", Select),
+ 24: syscalls.Supported("sched_yield", SchedYield),
+ 25: syscalls.Supported("mremap", Mremap),
+ 26: syscalls.PartiallySupported("msync", Msync, "Full data flush is not guaranteed at this time.", nil),
+ 27: syscalls.PartiallySupported("mincore", Mincore, "Stub implementation. The sandbox does not have access to this information. Reports all mapped pages are resident.", nil),
+ 28: syscalls.PartiallySupported("madvise", Madvise, "Options MADV_DONTNEED, MADV_DONTFORK are supported. Other advice is ignored.", nil),
+ 29: syscalls.PartiallySupported("shmget", Shmget, "Option SHM_HUGETLB is not supported.", nil),
+ 30: syscalls.PartiallySupported("shmat", Shmat, "Option SHM_RND is not supported.", nil),
+ 31: syscalls.PartiallySupported("shmctl", Shmctl, "Options SHM_LOCK, SHM_UNLOCK are not supported.", nil),
+ 32: syscalls.Supported("dup", Dup),
+ 33: syscalls.Supported("dup2", Dup2),
+ 34: syscalls.Supported("pause", Pause),
+ 35: syscalls.Supported("nanosleep", Nanosleep),
+ 36: syscalls.Supported("getitimer", Getitimer),
+ 37: syscalls.Supported("alarm", Alarm),
+ 38: syscalls.Supported("setitimer", Setitimer),
+ 39: syscalls.Supported("getpid", Getpid),
+ 40: syscalls.Supported("sendfile", Sendfile),
+ 41: syscalls.PartiallySupported("socket", Socket, "Limited support for AF_NETLINK, NETLINK_ROUTE sockets. Limited support for SOCK_RAW.", nil),
+ 42: syscalls.Supported("connect", Connect),
+ 43: syscalls.Supported("accept", Accept),
+ 44: syscalls.Supported("sendto", SendTo),
+ 45: syscalls.Supported("recvfrom", RecvFrom),
+ 46: syscalls.Supported("sendmsg", SendMsg),
+ 47: syscalls.PartiallySupported("recvmsg", RecvMsg, "Not all flags and control messages are supported.", nil),
+ 48: syscalls.PartiallySupported("shutdown", Shutdown, "Not all flags and control messages are supported.", nil),
+ 49: syscalls.PartiallySupported("bind", Bind, "Autobind for abstract Unix sockets is not supported.", nil),
+ 50: syscalls.Supported("listen", Listen),
+ 51: syscalls.Supported("getsockname", GetSockName),
+ 52: syscalls.Supported("getpeername", GetPeerName),
+ 53: syscalls.Supported("socketpair", SocketPair),
+ 54: syscalls.PartiallySupported("setsockopt", SetSockOpt, "Not all socket options are supported.", nil),
+ 55: syscalls.PartiallySupported("getsockopt", GetSockOpt, "Not all socket options are supported.", nil),
+ 56: syscalls.PartiallySupported("clone", Clone, "Mount namespace (CLONE_NEWNS) not supported. Options CLONE_PARENT, CLONE_SYSVSEM not supported.", nil),
+ 57: syscalls.Supported("fork", Fork),
+ 58: syscalls.Supported("vfork", Vfork),
+ 59: syscalls.Supported("execve", Execve),
+ 60: syscalls.Supported("exit", Exit),
+ 61: syscalls.Supported("wait4", Wait4),
+ 62: syscalls.Supported("kill", Kill),
+ 63: syscalls.Supported("uname", Uname),
+ 64: syscalls.Supported("semget", Semget),
+ 65: syscalls.PartiallySupported("semop", Semop, "Option SEM_UNDO not supported.", nil),
+ 66: syscalls.PartiallySupported("semctl", Semctl, "Options IPC_INFO, SEM_INFO, IPC_STAT, SEM_STAT, SEM_STAT_ANY, GETNCNT, GETZCNT not supported.", nil),
+ 67: syscalls.Supported("shmdt", Shmdt),
+ 68: syscalls.ErrorWithEvent("msgget", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921)
+ 69: syscalls.ErrorWithEvent("msgsnd", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921)
+ 70: syscalls.ErrorWithEvent("msgrcv", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921)
+ 71: syscalls.ErrorWithEvent("msgctl", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921)
+ 72: syscalls.PartiallySupported("fcntl", Fcntl, "Not all options are supported.", nil),
+ 73: syscalls.PartiallySupported("flock", Flock, "Locks are held within the sandbox only.", nil),
+ 74: syscalls.PartiallySupported("fsync", Fsync, "Full data flush is not guaranteed at this time.", nil),
+ 75: syscalls.PartiallySupported("fdatasync", Fdatasync, "Full data flush is not guaranteed at this time.", nil),
+ 76: syscalls.Supported("truncate", Truncate),
+ 77: syscalls.Supported("ftruncate", Ftruncate),
+ 78: syscalls.Supported("getdents", Getdents),
+ 79: syscalls.Supported("getcwd", Getcwd),
+ 80: syscalls.Supported("chdir", Chdir),
+ 81: syscalls.Supported("fchdir", Fchdir),
+ 82: syscalls.Supported("rename", Rename),
+ 83: syscalls.Supported("mkdir", Mkdir),
+ 84: syscalls.Supported("rmdir", Rmdir),
+ 85: syscalls.Supported("creat", Creat),
+ 86: syscalls.Supported("link", Link),
+ 87: syscalls.Supported("unlink", Unlink),
+ 88: syscalls.Supported("symlink", Symlink),
+ 89: syscalls.Supported("readlink", Readlink),
+ 90: syscalls.Supported("chmod", Chmod),
+ 91: syscalls.PartiallySupported("fchmod", Fchmod, "Options S_ISUID and S_ISGID not supported.", nil),
+ 92: syscalls.Supported("chown", Chown),
+ 93: syscalls.Supported("fchown", Fchown),
+ 94: syscalls.Supported("lchown", Lchown),
+ 95: syscalls.Supported("umask", Umask),
+ 96: syscalls.Supported("gettimeofday", Gettimeofday),
+ 97: syscalls.Supported("getrlimit", Getrlimit),
+ 98: syscalls.PartiallySupported("getrusage", Getrusage, "Fields ru_maxrss, ru_minflt, ru_majflt, ru_inblock, ru_oublock are not supported. Fields ru_utime and ru_stime have low precision.", nil),
+ 99: syscalls.PartiallySupported("sysinfo", Sysinfo, "Fields loads, sharedram, bufferram, totalswap, freeswap, totalhigh, freehigh not supported.", nil),
+ 100: syscalls.Supported("times", Times),
+ 101: syscalls.PartiallySupported("ptrace", Ptrace, "Options PTRACE_PEEKSIGINFO, PTRACE_SECCOMP_GET_FILTER not supported.", nil),
+ 102: syscalls.Supported("getuid", Getuid),
+ 103: syscalls.PartiallySupported("syslog", Syslog, "Outputs a dummy message for security reasons.", nil),
+ 104: syscalls.Supported("getgid", Getgid),
+ 105: syscalls.Supported("setuid", Setuid),
+ 106: syscalls.Supported("setgid", Setgid),
+ 107: syscalls.Supported("geteuid", Geteuid),
+ 108: syscalls.Supported("getegid", Getegid),
+ 109: syscalls.Supported("setpgid", Setpgid),
+ 110: syscalls.Supported("getppid", Getppid),
+ 111: syscalls.Supported("getpgrp", Getpgrp),
+ 112: syscalls.Supported("setsid", Setsid),
+ 113: syscalls.Supported("setreuid", Setreuid),
+ 114: syscalls.Supported("setregid", Setregid),
+ 115: syscalls.Supported("getgroups", Getgroups),
+ 116: syscalls.Supported("setgroups", Setgroups),
+ 117: syscalls.Supported("setresuid", Setresuid),
+ 118: syscalls.Supported("getresuid", Getresuid),
+ 119: syscalls.Supported("setresgid", Setresgid),
+ 120: syscalls.Supported("getresgid", Getresgid),
+ 121: syscalls.Supported("getpgid", Getpgid),
+ 122: syscalls.ErrorWithEvent("setfsuid", syserror.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702)
+ 123: syscalls.ErrorWithEvent("setfsgid", syserror.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702)
+ 124: syscalls.Supported("getsid", Getsid),
+ 125: syscalls.Supported("capget", Capget),
+ 126: syscalls.Supported("capset", Capset),
+ 127: syscalls.Supported("rt_sigpending", RtSigpending),
+ 128: syscalls.Supported("rt_sigtimedwait", RtSigtimedwait),
+ 129: syscalls.Supported("rt_sigqueueinfo", RtSigqueueinfo),
+ 130: syscalls.Supported("rt_sigsuspend", RtSigsuspend),
+ 131: syscalls.Supported("sigaltstack", Sigaltstack),
+ 132: syscalls.Supported("utime", Utime),
+ 133: syscalls.PartiallySupported("mknod", Mknod, "Device creation is not generally supported. Only regular file and FIFO creation are supported.", nil),
+ 134: syscalls.Error("uselib", syserror.ENOSYS, "Obsolete", nil),
+ 135: syscalls.ErrorWithEvent("personality", syserror.EINVAL, "Unable to change personality.", nil),
+ 136: syscalls.ErrorWithEvent("ustat", syserror.ENOSYS, "Needs filesystem support.", nil),
+ 137: syscalls.PartiallySupported("statfs", Statfs, "Depends on the backing file system implementation.", nil),
+ 138: syscalls.PartiallySupported("fstatfs", Fstatfs, "Depends on the backing file system implementation.", nil),
+ 139: syscalls.ErrorWithEvent("sysfs", syserror.ENOSYS, "", []string{"gvisor.dev/issue/165"}),
+ 140: syscalls.PartiallySupported("getpriority", Getpriority, "Stub implementation.", nil),
+ 141: syscalls.PartiallySupported("setpriority", Setpriority, "Stub implementation.", nil),
142: syscalls.CapError("sched_setparam", linux.CAP_SYS_NICE, "", nil),
- 143: syscalls.Undocumented("sched_getparam", SchedGetparam),
- 144: syscalls.Undocumented("sched_setscheduler", SchedSetscheduler),
- 145: syscalls.Undocumented("sched_getscheduler", SchedGetscheduler),
- 146: syscalls.Undocumented("sched_get_priority_max", SchedGetPriorityMax),
- 147: syscalls.Undocumented("sched_get_priority_min", SchedGetPriorityMin),
- 148: syscalls.ErrorWithEvent("sched_rr_get_interval", syscall.EPERM, "", nil),
- 149: syscalls.Undocumented("mlock", Mlock),
- 150: syscalls.Undocumented("munlock", Munlock),
- 151: syscalls.Undocumented("mlockall", Mlockall),
- 152: syscalls.Undocumented("munlockall", Munlockall),
+ 143: syscalls.PartiallySupported("sched_getparam", SchedGetparam, "Stub implementation.", nil),
+ 144: syscalls.PartiallySupported("sched_setscheduler", SchedSetscheduler, "Stub implementation.", nil),
+ 145: syscalls.PartiallySupported("sched_getscheduler", SchedGetscheduler, "Stub implementation.", nil),
+ 146: syscalls.PartiallySupported("sched_get_priority_max", SchedGetPriorityMax, "Stub implementation.", nil),
+ 147: syscalls.PartiallySupported("sched_get_priority_min", SchedGetPriorityMin, "Stub implementation.", nil),
+ 148: syscalls.ErrorWithEvent("sched_rr_get_interval", syserror.EPERM, "", nil),
+ 149: syscalls.PartiallySupported("mlock", Mlock, "Stub implementation. The sandbox lacks appropriate permissions.", nil),
+ 150: syscalls.PartiallySupported("munlock", Munlock, "Stub implementation. The sandbox lacks appropriate permissions.", nil),
+ 151: syscalls.PartiallySupported("mlockall", Mlockall, "Stub implementation. The sandbox lacks appropriate permissions.", nil),
+ 152: syscalls.PartiallySupported("munlockall", Munlockall, "Stub implementation. The sandbox lacks appropriate permissions.", nil),
153: syscalls.CapError("vhangup", linux.CAP_SYS_TTY_CONFIG, "", nil),
- 154: syscalls.Error("modify_ldt", syscall.EPERM, "", nil),
- 155: syscalls.Error("pivot_root", syscall.EPERM, "", nil),
- 156: syscalls.Error("sysctl", syscall.EPERM, `syscall is "worthless"`, nil),
- 157: syscalls.Undocumented("prctl", Prctl),
- 158: syscalls.Undocumented("arch_prctl", ArchPrctl),
+ 154: syscalls.Error("modify_ldt", syserror.EPERM, "", nil),
+ 155: syscalls.Error("pivot_root", syserror.EPERM, "", nil),
+ 156: syscalls.Error("sysctl", syserror.EPERM, "Deprecated. Use /proc/sys instead.", nil),
+ 157: syscalls.PartiallySupported("prctl", Prctl, "Not all options are supported.", nil),
+ 158: syscalls.PartiallySupported("arch_prctl", ArchPrctl, "Options ARCH_GET_GS, ARCH_SET_GS not supported.", nil),
159: syscalls.CapError("adjtimex", linux.CAP_SYS_TIME, "", nil),
- 160: syscalls.Undocumented("setrlimit", Setrlimit),
- 161: syscalls.Undocumented("chroot", Chroot),
- 162: syscalls.Undocumented("sync", Sync),
+ 160: syscalls.PartiallySupported("setrlimit", Setrlimit, "Not all rlimits are enforced.", nil),
+ 161: syscalls.Supported("chroot", Chroot),
+ 162: syscalls.PartiallySupported("sync", Sync, "Full data flush is not guaranteed at this time.", nil),
163: syscalls.CapError("acct", linux.CAP_SYS_PACCT, "", nil),
164: syscalls.CapError("settimeofday", linux.CAP_SYS_TIME, "", nil),
- 165: syscalls.Undocumented("mount", Mount),
- 166: syscalls.Undocumented("umount2", Umount2),
+ 165: syscalls.PartiallySupported("mount", Mount, "Not all options or file systems are supported.", nil),
+ 166: syscalls.PartiallySupported("umount2", Umount2, "Not all options or file systems are supported.", nil),
167: syscalls.CapError("swapon", linux.CAP_SYS_ADMIN, "", nil),
168: syscalls.CapError("swapoff", linux.CAP_SYS_ADMIN, "", nil),
169: syscalls.CapError("reboot", linux.CAP_SYS_BOOT, "", nil),
- 170: syscalls.Undocumented("sethostname", Sethostname),
- 171: syscalls.Undocumented("setdomainname", Setdomainname),
+ 170: syscalls.Supported("sethostname", Sethostname),
+ 171: syscalls.Supported("setdomainname", Setdomainname),
172: syscalls.CapError("iopl", linux.CAP_SYS_RAWIO, "", nil),
173: syscalls.CapError("ioperm", linux.CAP_SYS_RAWIO, "", nil),
174: syscalls.CapError("create_module", linux.CAP_SYS_MODULE, "", nil),
175: syscalls.CapError("init_module", linux.CAP_SYS_MODULE, "", nil),
176: syscalls.CapError("delete_module", linux.CAP_SYS_MODULE, "", nil),
- 177: syscalls.Error("get_kernel_syms", syscall.ENOSYS, "Not supported in > 2.6", nil),
- 178: syscalls.Error("query_module", syscall.ENOSYS, "Not supported in > 2.6", nil),
+ 177: syscalls.Error("get_kernel_syms", syserror.ENOSYS, "Not supported in Linux > 2.6.", nil),
+ 178: syscalls.Error("query_module", syserror.ENOSYS, "Not supported in Linux > 2.6.", nil),
179: syscalls.CapError("quotactl", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_admin for most operations
- 180: syscalls.Error("nfsservctl", syscall.ENOSYS, "Does not exist > 3.1", nil),
- 181: syscalls.Error("getpmsg", syscall.ENOSYS, "Not implemented in Linux", nil),
- 182: syscalls.Error("putpmsg", syscall.ENOSYS, "Not implemented in Linux", nil),
- 183: syscalls.Error("afs_syscall", syscall.ENOSYS, "Not implemented in Linux", nil),
- 184: syscalls.Error("tuxcall", syscall.ENOSYS, "Not implemented in Linux", nil),
- 185: syscalls.Error("security", syscall.ENOSYS, "Not implemented in Linux", nil),
- 186: syscalls.Undocumented("gettid", Gettid),
- 187: syscalls.ErrorWithEvent("readahead", syscall.ENOSYS, "", []string{"gvisor.dev/issue/261"}), // TODO(b/29351341)
- 188: syscalls.ErrorWithEvent("setxattr", syscall.ENOTSUP, "Requires filesystem support", nil),
- 189: syscalls.ErrorWithEvent("lsetxattr", syscall.ENOTSUP, "Requires filesystem support", nil),
- 190: syscalls.ErrorWithEvent("fsetxattr", syscall.ENOTSUP, "Requires filesystem support", nil),
- 191: syscalls.ErrorWithEvent("getxattr", syscall.ENOTSUP, "Requires filesystem support", nil),
- 192: syscalls.ErrorWithEvent("lgetxattr", syscall.ENOTSUP, "Requires filesystem support", nil),
- 193: syscalls.ErrorWithEvent("fgetxattr", syscall.ENOTSUP, "Requires filesystem support", nil),
- 194: syscalls.ErrorWithEvent("listxattr", syscall.ENOTSUP, "Requires filesystem support", nil),
- 195: syscalls.ErrorWithEvent("llistxattr", syscall.ENOTSUP, "Requires filesystem support", nil),
- 196: syscalls.ErrorWithEvent("flistxattr", syscall.ENOTSUP, "Requires filesystem support", nil),
- 197: syscalls.ErrorWithEvent("removexattr", syscall.ENOTSUP, "Requires filesystem support", nil),
- 198: syscalls.ErrorWithEvent("lremovexattr", syscall.ENOTSUP, "Requires filesystem support", nil),
- 199: syscalls.ErrorWithEvent("fremovexattr", syscall.ENOTSUP, "Requires filesystem support", nil),
- 200: syscalls.Undocumented("tkill", Tkill),
- 201: syscalls.Undocumented("time", Time),
- 202: syscalls.Undocumented("futex", Futex),
- 203: syscalls.Undocumented("sched_setaffinity", SchedSetaffinity),
- 204: syscalls.Undocumented("sched_getaffinity", SchedGetaffinity),
- 205: syscalls.Error("set_thread_area", syscall.ENOSYS, "Expected to return ENOSYS on 64-bit", nil),
- 206: syscalls.Undocumented("io_setup", IoSetup),
- 207: syscalls.Undocumented("io_destroy", IoDestroy),
- 208: syscalls.Undocumented("io_getevents", IoGetevents),
- 209: syscalls.Undocumented("io_submit", IoSubmit),
- 210: syscalls.Undocumented("io_cancel", IoCancel),
- 211: syscalls.Error("get_thread_area", syscall.ENOSYS, "Expected to return ENOSYS on 64-bit", nil),
+ 180: syscalls.Error("nfsservctl", syserror.ENOSYS, "Removed after Linux 3.1.", nil),
+ 181: syscalls.Error("getpmsg", syserror.ENOSYS, "Not implemented in Linux.", nil),
+ 182: syscalls.Error("putpmsg", syserror.ENOSYS, "Not implemented in Linux.", nil),
+ 183: syscalls.Error("afs_syscall", syserror.ENOSYS, "Not implemented in Linux.", nil),
+ 184: syscalls.Error("tuxcall", syserror.ENOSYS, "Not implemented in Linux.", nil),
+ 185: syscalls.Error("security", syserror.ENOSYS, "Not implemented in Linux.", nil),
+ 186: syscalls.Supported("gettid", Gettid),
+ 187: syscalls.ErrorWithEvent("readahead", syserror.ENOSYS, "", []string{"gvisor.dev/issue/261"}), // TODO(b/29351341)
+ 188: syscalls.Error("setxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ 189: syscalls.Error("lsetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ 190: syscalls.Error("fsetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ 191: syscalls.ErrorWithEvent("getxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ 192: syscalls.ErrorWithEvent("lgetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ 193: syscalls.ErrorWithEvent("fgetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ 194: syscalls.ErrorWithEvent("listxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ 195: syscalls.ErrorWithEvent("llistxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ 196: syscalls.ErrorWithEvent("flistxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ 197: syscalls.ErrorWithEvent("removexattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ 198: syscalls.ErrorWithEvent("lremovexattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ 199: syscalls.ErrorWithEvent("fremovexattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ 200: syscalls.Supported("tkill", Tkill),
+ 201: syscalls.Supported("time", Time),
+ 202: syscalls.PartiallySupported("futex", Futex, "Robust futexes not supported.", nil),
+ 203: syscalls.PartiallySupported("sched_setaffinity", SchedSetaffinity, "Stub implementation.", nil),
+ 204: syscalls.PartiallySupported("sched_getaffinity", SchedGetaffinity, "Stub implementation.", nil),
+ 205: syscalls.Error("set_thread_area", syserror.ENOSYS, "Expected to return ENOSYS on 64-bit", nil),
+ 206: syscalls.PartiallySupported("io_setup", IoSetup, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
+ 207: syscalls.PartiallySupported("io_destroy", IoDestroy, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
+ 208: syscalls.PartiallySupported("io_getevents", IoGetevents, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
+ 209: syscalls.PartiallySupported("io_submit", IoSubmit, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
+ 210: syscalls.PartiallySupported("io_cancel", IoCancel, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
+ 211: syscalls.Error("get_thread_area", syserror.ENOSYS, "Expected to return ENOSYS on 64-bit", nil),
212: syscalls.CapError("lookup_dcookie", linux.CAP_SYS_ADMIN, "", nil),
- 213: syscalls.Undocumented("epoll_create", EpollCreate),
- 214: syscalls.ErrorWithEvent("epoll_ctl_old", syscall.ENOSYS, "Deprecated", nil),
- 215: syscalls.ErrorWithEvent("epoll_wait_old", syscall.ENOSYS, "Deprecated", nil),
- 216: syscalls.ErrorWithEvent("remap_file_pages", syscall.ENOSYS, "Deprecated since 3.16", nil),
- 217: syscalls.Undocumented("getdents64", Getdents64),
- 218: syscalls.Undocumented("set_tid_address", SetTidAddress),
- 219: syscalls.Undocumented("restart_syscall", RestartSyscall),
- 220: syscalls.ErrorWithEvent("semtimedop", syscall.ENOSYS, "", []string{"gvisor.dev/issue/137"}), // TODO(b/29354920)
- 221: syscalls.Undocumented("fadvise64", Fadvise64),
- 222: syscalls.Undocumented("timer_create", TimerCreate),
- 223: syscalls.Undocumented("timer_settime", TimerSettime),
- 224: syscalls.Undocumented("timer_gettime", TimerGettime),
- 225: syscalls.Undocumented("timer_getoverrun", TimerGetoverrun),
- 226: syscalls.Undocumented("timer_delete", TimerDelete),
- 227: syscalls.Undocumented("clock_settime", ClockSettime),
- 228: syscalls.Undocumented("clock_gettime", ClockGettime),
- 229: syscalls.Undocumented("clock_getres", ClockGetres),
- 230: syscalls.Undocumented("clock_nanosleep", ClockNanosleep),
- 231: syscalls.Undocumented("exit_group", ExitGroup),
- 232: syscalls.Undocumented("epoll_wait", EpollWait),
- 233: syscalls.Undocumented("epoll_ctl", EpollCtl),
- 234: syscalls.Undocumented("tgkill", Tgkill),
- 235: syscalls.Undocumented("utimes", Utimes),
- 236: syscalls.Error("vserver", syscall.ENOSYS, "Not implemented by Linux", nil),
+ 213: syscalls.Supported("epoll_create", EpollCreate),
+ 214: syscalls.ErrorWithEvent("epoll_ctl_old", syserror.ENOSYS, "Deprecated.", nil),
+ 215: syscalls.ErrorWithEvent("epoll_wait_old", syserror.ENOSYS, "Deprecated.", nil),
+ 216: syscalls.ErrorWithEvent("remap_file_pages", syserror.ENOSYS, "Deprecated since Linux 3.16.", nil),
+ 217: syscalls.Supported("getdents64", Getdents64),
+ 218: syscalls.Supported("set_tid_address", SetTidAddress),
+ 219: syscalls.Supported("restart_syscall", RestartSyscall),
+ 220: syscalls.ErrorWithEvent("semtimedop", syserror.ENOSYS, "", []string{"gvisor.dev/issue/137"}), // TODO(b/29354920)
+ 221: syscalls.PartiallySupported("fadvise64", Fadvise64, "Not all options are supported.", nil),
+ 222: syscalls.Supported("timer_create", TimerCreate),
+ 223: syscalls.Supported("timer_settime", TimerSettime),
+ 224: syscalls.Supported("timer_gettime", TimerGettime),
+ 225: syscalls.Supported("timer_getoverrun", TimerGetoverrun),
+ 226: syscalls.Supported("timer_delete", TimerDelete),
+ 227: syscalls.Supported("clock_settime", ClockSettime),
+ 228: syscalls.Supported("clock_gettime", ClockGettime),
+ 229: syscalls.Supported("clock_getres", ClockGetres),
+ 230: syscalls.Supported("clock_nanosleep", ClockNanosleep),
+ 231: syscalls.Supported("exit_group", ExitGroup),
+ 232: syscalls.Supported("epoll_wait", EpollWait),
+ 233: syscalls.Supported("epoll_ctl", EpollCtl),
+ 234: syscalls.Supported("tgkill", Tgkill),
+ 235: syscalls.Supported("utimes", Utimes),
+ 236: syscalls.Error("vserver", syserror.ENOSYS, "Not implemented by Linux", nil),
237: syscalls.PartiallySupported("mbind", Mbind, "Stub implementation. Only a single NUMA node is advertised, and mempolicy is ignored accordingly, but mbind() will succeed and has effects reflected by get_mempolicy.", []string{"gvisor.dev/issue/262"}),
- 238: syscalls.Undocumented("set_mempolicy", SetMempolicy),
- 239: syscalls.Undocumented("get_mempolicy", GetMempolicy),
- 240: syscalls.ErrorWithEvent("mq_open", syscall.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
- 241: syscalls.ErrorWithEvent("mq_unlink", syscall.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
- 242: syscalls.ErrorWithEvent("mq_timedsend", syscall.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
- 243: syscalls.ErrorWithEvent("mq_timedreceive", syscall.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
- 244: syscalls.ErrorWithEvent("mq_notify", syscall.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
- 245: syscalls.ErrorWithEvent("mq_getsetattr", syscall.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
+ 238: syscalls.PartiallySupported("set_mempolicy", SetMempolicy, "Stub implementation.", nil),
+ 239: syscalls.PartiallySupported("get_mempolicy", GetMempolicy, "Stub implementation.", nil),
+ 240: syscalls.ErrorWithEvent("mq_open", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
+ 241: syscalls.ErrorWithEvent("mq_unlink", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
+ 242: syscalls.ErrorWithEvent("mq_timedsend", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
+ 243: syscalls.ErrorWithEvent("mq_timedreceive", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
+ 244: syscalls.ErrorWithEvent("mq_notify", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
+ 245: syscalls.ErrorWithEvent("mq_getsetattr", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
246: syscalls.CapError("kexec_load", linux.CAP_SYS_BOOT, "", nil),
- 247: syscalls.Undocumented("waitid", Waitid),
- 248: syscalls.Error("add_key", syscall.EACCES, "Not available to user", nil),
- 249: syscalls.Error("request_key", syscall.EACCES, "Not available to user", nil),
- 250: syscalls.Error("keyctl", syscall.EACCES, "Not available to user", nil),
+ 247: syscalls.Supported("waitid", Waitid),
+ 248: syscalls.Error("add_key", syserror.EACCES, "Not available to user.", nil),
+ 249: syscalls.Error("request_key", syserror.EACCES, "Not available to user.", nil),
+ 250: syscalls.Error("keyctl", syserror.EACCES, "Not available to user.", nil),
251: syscalls.CapError("ioprio_set", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_nice or cap_sys_admin (depending)
252: syscalls.CapError("ioprio_get", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_nice or cap_sys_admin (depending)
- 253: syscalls.Undocumented("inotify_init", InotifyInit),
- 254: syscalls.Undocumented("inotify_add_watch", InotifyAddWatch),
- 255: syscalls.Undocumented("inotify_rm_watch", InotifyRmWatch),
+ 253: syscalls.PartiallySupported("inotify_init", InotifyInit, "inotify events are only available inside the sandbox.", nil),
+ 254: syscalls.PartiallySupported("inotify_add_watch", InotifyAddWatch, "inotify events are only available inside the sandbox.", nil),
+ 255: syscalls.PartiallySupported("inotify_rm_watch", InotifyRmWatch, "inotify events are only available inside the sandbox.", nil),
256: syscalls.CapError("migrate_pages", linux.CAP_SYS_NICE, "", nil),
- 257: syscalls.Undocumented("openat", Openat),
- 258: syscalls.Undocumented("mkdirat", Mkdirat),
- 259: syscalls.Undocumented("mknodat", Mknodat),
- 260: syscalls.Undocumented("fchownat", Fchownat),
- 261: syscalls.Undocumented("futimesat", Futimesat),
- 262: syscalls.Undocumented("fstatat", Fstatat),
- 263: syscalls.Undocumented("unlinkat", Unlinkat),
- 264: syscalls.Undocumented("renameat", Renameat),
- 265: syscalls.Undocumented("linkat", Linkat),
- 266: syscalls.Undocumented("symlinkat", Symlinkat),
- 267: syscalls.Undocumented("readlinkat", Readlinkat),
- 268: syscalls.Undocumented("fchmodat", Fchmodat),
- 269: syscalls.Undocumented("faccessat", Faccessat),
- 270: syscalls.Undocumented("pselect", Pselect),
- 271: syscalls.Undocumented("ppoll", Ppoll),
- 272: syscalls.Undocumented("unshare", Unshare),
- 273: syscalls.Error("set_robust_list", syscall.ENOSYS, "Obsolete", nil),
- 274: syscalls.Error("get_robust_list", syscall.ENOSYS, "Obsolete", nil),
- 275: syscalls.PartiallySupported("splice", Splice, "Stub implementation", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098)
- 276: syscalls.ErrorWithEvent("tee", syscall.ENOSYS, "", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098)
- 277: syscalls.Undocumented("sync_file_range", SyncFileRange),
- 278: syscalls.ErrorWithEvent("vmsplice", syscall.ENOSYS, "", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098)
- 279: syscalls.CapError("move_pages", linux.CAP_SYS_NICE, "", nil), // requires cap_sys_nice (mostly)
- 280: syscalls.Undocumented("utimensat", Utimensat),
- 281: syscalls.Undocumented("epoll_pwait", EpollPwait),
- 282: syscalls.ErrorWithEvent("signalfd", syscall.ENOSYS, "", []string{"gvisor.dev/issue/139"}), // TODO(b/19846426)
- 283: syscalls.Undocumented("timerfd_create", TimerfdCreate),
- 284: syscalls.Undocumented("eventfd", Eventfd),
- 285: syscalls.Undocumented("fallocate", Fallocate),
- 286: syscalls.Undocumented("timerfd_settime", TimerfdSettime),
- 287: syscalls.Undocumented("timerfd_gettime", TimerfdGettime),
- 288: syscalls.Undocumented("accept4", Accept4),
- 289: syscalls.ErrorWithEvent("signalfd4", syscall.ENOSYS, "", []string{"gvisor.dev/issue/139"}), // TODO(b/19846426)
- 290: syscalls.Undocumented("eventfd2", Eventfd2),
- 291: syscalls.Undocumented("epoll_create1", EpollCreate1),
- 292: syscalls.Undocumented("dup3", Dup3),
- 293: syscalls.Undocumented("pipe2", Pipe2),
- 294: syscalls.Undocumented("inotify_init1", InotifyInit1),
- 295: syscalls.Undocumented("preadv", Preadv),
- 296: syscalls.Undocumented("pwritev", Pwritev),
- 297: syscalls.Undocumented("rt_tgsigqueueinfo", RtTgsigqueueinfo),
- 298: syscalls.ErrorWithEvent("perf_event_open", syscall.ENODEV, "No support for perf counters", nil),
- 299: syscalls.Undocumented("recvmmsg", RecvMMsg),
- 300: syscalls.ErrorWithEvent("fanotify_init", syscall.ENOSYS, "Needs CONFIG_FANOTIFY", nil),
- 301: syscalls.ErrorWithEvent("fanotify_mark", syscall.ENOSYS, "Needs CONFIG_FANOTIFY", nil),
- 302: syscalls.Undocumented("prlimit64", Prlimit64),
- 303: syscalls.ErrorWithEvent("name_to_handle_at", syscall.EOPNOTSUPP, "Needs filesystem support", nil),
- 304: syscalls.ErrorWithEvent("open_by_handle_at", syscall.EOPNOTSUPP, "Needs filesystem support", nil),
+ 257: syscalls.Supported("openat", Openat),
+ 258: syscalls.Supported("mkdirat", Mkdirat),
+ 259: syscalls.Supported("mknodat", Mknodat),
+ 260: syscalls.Supported("fchownat", Fchownat),
+ 261: syscalls.Supported("futimesat", Futimesat),
+ 262: syscalls.Supported("fstatat", Fstatat),
+ 263: syscalls.Supported("unlinkat", Unlinkat),
+ 264: syscalls.Supported("renameat", Renameat),
+ 265: syscalls.Supported("linkat", Linkat),
+ 266: syscalls.Supported("symlinkat", Symlinkat),
+ 267: syscalls.Supported("readlinkat", Readlinkat),
+ 268: syscalls.Supported("fchmodat", Fchmodat),
+ 269: syscalls.Supported("faccessat", Faccessat),
+ 270: syscalls.Supported("pselect", Pselect),
+ 271: syscalls.Supported("ppoll", Ppoll),
+ 272: syscalls.PartiallySupported("unshare", Unshare, "Mount, cgroup namespaces not supported. Network namespaces supported but must be empty.", nil),
+ 273: syscalls.Error("set_robust_list", syserror.ENOSYS, "Obsolete.", nil),
+ 274: syscalls.Error("get_robust_list", syserror.ENOSYS, "Obsolete.", nil),
+ 275: syscalls.PartiallySupported("splice", Splice, "Stub implementation.", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098)
+ 276: syscalls.ErrorWithEvent("tee", syserror.ENOSYS, "", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098)
+ 277: syscalls.PartiallySupported("sync_file_range", SyncFileRange, "Full data flush is not guaranteed at this time.", nil),
+ 278: syscalls.ErrorWithEvent("vmsplice", syserror.ENOSYS, "", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098)
+ 279: syscalls.CapError("move_pages", linux.CAP_SYS_NICE, "", nil), // requires cap_sys_nice (mostly)
+ 280: syscalls.Supported("utimensat", Utimensat),
+ 281: syscalls.Supported("epoll_pwait", EpollPwait),
+ 282: syscalls.ErrorWithEvent("signalfd", syserror.ENOSYS, "", []string{"gvisor.dev/issue/139"}), // TODO(b/19846426)
+ 283: syscalls.Supported("timerfd_create", TimerfdCreate),
+ 284: syscalls.Supported("eventfd", Eventfd),
+ 285: syscalls.PartiallySupported("fallocate", Fallocate, "Not all options are supported.", nil),
+ 286: syscalls.Supported("timerfd_settime", TimerfdSettime),
+ 287: syscalls.Supported("timerfd_gettime", TimerfdGettime),
+ 288: syscalls.Supported("accept4", Accept4),
+ 289: syscalls.ErrorWithEvent("signalfd4", syserror.ENOSYS, "", []string{"gvisor.dev/issue/139"}), // TODO(b/19846426)
+ 290: syscalls.Supported("eventfd2", Eventfd2),
+ 291: syscalls.Supported("epoll_create1", EpollCreate1),
+ 292: syscalls.Supported("dup3", Dup3),
+ 293: syscalls.Supported("pipe2", Pipe2),
+ 294: syscalls.Supported("inotify_init1", InotifyInit1),
+ 295: syscalls.Supported("preadv", Preadv),
+ 296: syscalls.Supported("pwritev", Pwritev),
+ 297: syscalls.Supported("rt_tgsigqueueinfo", RtTgsigqueueinfo),
+ 298: syscalls.ErrorWithEvent("perf_event_open", syserror.ENODEV, "No support for perf counters", nil),
+ 299: syscalls.PartiallySupported("recvmmsg", RecvMMsg, "Not all flags and control messages are supported.", nil),
+ 300: syscalls.ErrorWithEvent("fanotify_init", syserror.ENOSYS, "Needs CONFIG_FANOTIFY", nil),
+ 301: syscalls.ErrorWithEvent("fanotify_mark", syserror.ENOSYS, "Needs CONFIG_FANOTIFY", nil),
+ 302: syscalls.Supported("prlimit64", Prlimit64),
+ 303: syscalls.Error("name_to_handle_at", syserror.EOPNOTSUPP, "Not supported by gVisor filesystems", nil),
+ 304: syscalls.Error("open_by_handle_at", syserror.EOPNOTSUPP, "Not supported by gVisor filesystems", nil),
305: syscalls.CapError("clock_adjtime", linux.CAP_SYS_TIME, "", nil),
- 306: syscalls.Undocumented("syncfs", Syncfs),
- 307: syscalls.Undocumented("sendmmsg", SendMMsg),
- 308: syscalls.ErrorWithEvent("setns", syscall.EOPNOTSUPP, "Needs filesystem support", []string{"gvisor.dev/issue/140"}), // TODO(b/29354995)
- 309: syscalls.Undocumented("getcpu", Getcpu),
- 310: syscalls.ErrorWithEvent("process_vm_readv", syscall.ENOSYS, "", []string{"gvisor.dev/issue/158"}),
- 311: syscalls.ErrorWithEvent("process_vm_writev", syscall.ENOSYS, "", []string{"gvisor.dev/issue/158"}),
+ 306: syscalls.PartiallySupported("syncfs", Syncfs, "Depends on backing file system.", nil),
+ 307: syscalls.PartiallySupported("sendmmsg", SendMMsg, "Not all flags and control messages are supported.", nil),
+ 308: syscalls.ErrorWithEvent("setns", syserror.EOPNOTSUPP, "Needs filesystem support", []string{"gvisor.dev/issue/140"}), // TODO(b/29354995)
+ 309: syscalls.Supported("getcpu", Getcpu),
+ 310: syscalls.ErrorWithEvent("process_vm_readv", syserror.ENOSYS, "", []string{"gvisor.dev/issue/158"}),
+ 311: syscalls.ErrorWithEvent("process_vm_writev", syserror.ENOSYS, "", []string{"gvisor.dev/issue/158"}),
312: syscalls.CapError("kcmp", linux.CAP_SYS_PTRACE, "", nil),
313: syscalls.CapError("finit_module", linux.CAP_SYS_MODULE, "", nil),
- 314: syscalls.ErrorWithEvent("sched_setattr", syscall.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272)
- 315: syscalls.ErrorWithEvent("sched_getattr", syscall.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272)
- 316: syscalls.ErrorWithEvent("renameat2", syscall.ENOSYS, "", []string{"gvisor.dev/issue/263"}), // TODO(b/118902772)
- 317: syscalls.Undocumented("seccomp", Seccomp),
- 318: syscalls.Undocumented("getrandom", GetRandom),
- 319: syscalls.Undocumented("memfd_create", MemfdCreate),
+ 314: syscalls.ErrorWithEvent("sched_setattr", syserror.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272)
+ 315: syscalls.ErrorWithEvent("sched_getattr", syserror.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272)
+ 316: syscalls.ErrorWithEvent("renameat2", syserror.ENOSYS, "", []string{"gvisor.dev/issue/263"}), // TODO(b/118902772)
+ 317: syscalls.Supported("seccomp", Seccomp),
+ 318: syscalls.Supported("getrandom", GetRandom),
+ 319: syscalls.Supported("memfd_create", MemfdCreate),
320: syscalls.CapError("kexec_file_load", linux.CAP_SYS_BOOT, "", nil),
321: syscalls.CapError("bpf", linux.CAP_SYS_ADMIN, "", nil),
- 322: syscalls.ErrorWithEvent("execveat", syscall.ENOSYS, "", []string{"gvisor.dev/issue/265"}), // TODO(b/118901836)
- 323: syscalls.ErrorWithEvent("userfaultfd", syscall.ENOSYS, "", []string{"gvisor.dev/issue/266"}), // TODO(b/118906345)
- 324: syscalls.ErrorWithEvent("membarrier", syscall.ENOSYS, "", []string{"gvisor.dev/issue/267"}), // TODO(b/118904897)
- 325: syscalls.Undocumented("mlock2", Mlock2),
+ 322: syscalls.ErrorWithEvent("execveat", syserror.ENOSYS, "", []string{"gvisor.dev/issue/265"}), // TODO(b/118901836)
+ 323: syscalls.ErrorWithEvent("userfaultfd", syserror.ENOSYS, "", []string{"gvisor.dev/issue/266"}), // TODO(b/118906345)
+ 324: syscalls.ErrorWithEvent("membarrier", syserror.ENOSYS, "", []string{"gvisor.dev/issue/267"}), // TODO(b/118904897)
+ 325: syscalls.PartiallySupported("mlock2", Mlock2, "Stub implementation. The sandbox lacks appropriate permissions.", nil),
// Syscalls after 325 are "backports" from versions of Linux after 4.4.
- 326: syscalls.ErrorWithEvent("copy_file_range", syscall.ENOSYS, "", nil),
- 327: syscalls.Undocumented("preadv2", Preadv2),
- 328: syscalls.Undocumented("pwritev2", Pwritev2),
+ 326: syscalls.ErrorWithEvent("copy_file_range", syserror.ENOSYS, "", nil),
+ 327: syscalls.Supported("preadv2", Preadv2),
+ 328: syscalls.PartiallySupported("pwritev2", Pwritev2, "Flag RWF_HIPRI is not supported.", nil),
+ 332: syscalls.Supported("statx", Statx),
},
Emulate: map[usermem.Addr]uintptr{
diff --git a/pkg/sentry/syscalls/linux/sigset.go b/pkg/sentry/syscalls/linux/sigset.go
index 5438b664b..333013d8c 100644
--- a/pkg/sentry/syscalls/linux/sigset.go
+++ b/pkg/sentry/syscalls/linux/sigset.go
@@ -15,19 +15,17 @@
package linux
import (
- "syscall"
-
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// copyInSigSet copies in a sigset_t, checks its size, and ensures that KILL and
// STOP are clear.
func copyInSigSet(t *kernel.Task, sigSetAddr usermem.Addr, size uint) (linux.SignalSet, error) {
if size != linux.SignalSetSize {
- return 0, syscall.EINVAL
+ return 0, syserror.EINVAL
}
b := t.CopyScratchBuffer(8)
if _, err := t.CopyInBytes(sigSetAddr, b); err != nil {
diff --git a/pkg/sentry/syscalls/linux/sys_aio.go b/pkg/sentry/syscalls/linux/sys_aio.go
index 1b27b2415..f56411bfe 100644
--- a/pkg/sentry/syscalls/linux/sys_aio.go
+++ b/pkg/sentry/syscalls/linux/sys_aio.go
@@ -17,15 +17,14 @@ package linux
import (
"encoding/binary"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/eventfd"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/mm"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/eventfd"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/mm"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// I/O commands.
@@ -55,7 +54,7 @@ type ioCallback struct {
OpCode uint16
ReqPrio int16
- FD uint32
+ FD int32
Buf uint64
Bytes uint64
@@ -65,7 +64,7 @@ type ioCallback struct {
Flags uint32
// eventfd to signal if IOCB_FLAG_RESFD is set in flags.
- ResFD uint32
+ ResFD int32
}
// ioEvent describes an I/O result.
@@ -292,7 +291,7 @@ func performCallback(t *kernel.Task, file *fs.File, cbAddr usermem.Addr, cb *ioC
// submitCallback processes a single callback.
func submitCallback(t *kernel.Task, id uint64, cb *ioCallback, cbAddr usermem.Addr) error {
- file := t.FDMap().GetFile(kdefs.FD(cb.FD))
+ file := t.GetFile(cb.FD)
if file == nil {
// File not found.
return syserror.EBADF
@@ -302,7 +301,7 @@ func submitCallback(t *kernel.Task, id uint64, cb *ioCallback, cbAddr usermem.Ad
// Was there an eventFD? Extract it.
var eventFile *fs.File
if cb.Flags&_IOCB_FLAG_RESFD != 0 {
- eventFile = t.FDMap().GetFile(kdefs.FD(cb.ResFD))
+ eventFile = t.GetFile(cb.ResFD)
if eventFile == nil {
// Bad FD.
return syserror.EBADF
diff --git a/pkg/sentry/syscalls/linux/sys_capability.go b/pkg/sentry/syscalls/linux/sys_capability.go
index 622cb8d0d..adf5ea5f2 100644
--- a/pkg/sentry/syscalls/linux/sys_capability.go
+++ b/pkg/sentry/syscalls/linux/sys_capability.go
@@ -15,11 +15,11 @@
package linux
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/syserror"
)
func lookupCaps(t *kernel.Task, tid kernel.ThreadID) (permitted, inheritable, effective auth.CapabilitySet, err error) {
diff --git a/pkg/sentry/syscalls/linux/sys_epoll.go b/pkg/sentry/syscalls/linux/sys_epoll.go
index 1467feb4e..4a2b9f061 100644
--- a/pkg/sentry/syscalls/linux/sys_epoll.go
+++ b/pkg/sentry/syscalls/linux/sys_epoll.go
@@ -15,26 +15,24 @@
package linux
import (
- "syscall"
-
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/epoll"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/epoll"
+ "gvisor.dev/gvisor/pkg/sentry/syscalls"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// EpollCreate1 implements the epoll_create1(2) linux syscall.
func EpollCreate1(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
flags := args[0].Int()
- if flags & ^syscall.EPOLL_CLOEXEC != 0 {
+ if flags & ^linux.EPOLL_CLOEXEC != 0 {
return 0, nil, syserror.EINVAL
}
- closeOnExec := flags&syscall.EPOLL_CLOEXEC != 0
+ closeOnExec := flags&linux.EPOLL_CLOEXEC != 0
fd, err := syscalls.CreateEpoll(t, closeOnExec)
if err != nil {
return 0, nil, err
@@ -61,46 +59,43 @@ func EpollCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S
// EpollCtl implements the epoll_ctl(2) linux syscall.
func EpollCtl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- epfd := kdefs.FD(args[0].Int())
+ epfd := args[0].Int()
op := args[1].Int()
- fd := kdefs.FD(args[2].Int())
+ fd := args[2].Int()
eventAddr := args[3].Pointer()
// Capture the event state if needed.
flags := epoll.EntryFlags(0)
mask := waiter.EventMask(0)
var data [2]int32
- if op != syscall.EPOLL_CTL_DEL {
- var e syscall.EpollEvent
+ if op != linux.EPOLL_CTL_DEL {
+ var e linux.EpollEvent
if _, err := t.CopyIn(eventAddr, &e); err != nil {
return 0, nil, err
}
- if e.Events&syscall.EPOLLONESHOT != 0 {
+ if e.Events&linux.EPOLLONESHOT != 0 {
flags |= epoll.OneShot
}
- // syscall.EPOLLET is incorrectly generated as a negative number
- // in Go, see https://github.com/golang/go/issues/5328 for
- // details.
- if e.Events&-syscall.EPOLLET != 0 {
+ if e.Events&linux.EPOLLET != 0 {
flags |= epoll.EdgeTriggered
}
mask = waiter.EventMaskFromLinux(e.Events)
data[0] = e.Fd
- data[1] = e.Pad
+ data[1] = e.Data
}
// Perform the requested operations.
switch op {
- case syscall.EPOLL_CTL_ADD:
+ case linux.EPOLL_CTL_ADD:
// See fs/eventpoll.c.
mask |= waiter.EventHUp | waiter.EventErr
return 0, nil, syscalls.AddEpoll(t, epfd, fd, flags, mask, data)
- case syscall.EPOLL_CTL_DEL:
+ case linux.EPOLL_CTL_DEL:
return 0, nil, syscalls.RemoveEpoll(t, epfd, fd)
- case syscall.EPOLL_CTL_MOD:
+ case linux.EPOLL_CTL_MOD:
// Same as EPOLL_CTL_ADD.
mask |= waiter.EventHUp | waiter.EventErr
return 0, nil, syscalls.UpdateEpoll(t, epfd, fd, flags, mask, data)
@@ -132,7 +127,7 @@ func copyOutEvents(t *kernel.Task, addr usermem.Addr, e []epoll.Event) error {
// EpollWait implements the epoll_wait(2) linux syscall.
func EpollWait(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- epfd := kdefs.FD(args[0].Int())
+ epfd := args[0].Int()
eventsAddr := args[1].Pointer()
maxEvents := int(args[2].Int())
timeout := int(args[3].Int())
diff --git a/pkg/sentry/syscalls/linux/sys_eventfd.go b/pkg/sentry/syscalls/linux/sys_eventfd.go
index ca4ead488..8a34c4e99 100644
--- a/pkg/sentry/syscalls/linux/sys_eventfd.go
+++ b/pkg/sentry/syscalls/linux/sys_eventfd.go
@@ -15,12 +15,11 @@
package linux
import (
- "syscall"
-
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/eventfd"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/eventfd"
+ "gvisor.dev/gvisor/pkg/syserror"
)
const (
@@ -38,7 +37,7 @@ func Eventfd2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
allOps := uint(EFD_SEMAPHORE | EFD_NONBLOCK | EFD_CLOEXEC)
if flags & ^allOps != 0 {
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
event := eventfd.New(t, uint64(initVal), flags&EFD_SEMAPHORE != 0)
@@ -47,10 +46,9 @@ func Eventfd2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
})
defer event.DecRef()
- fd, err := t.FDMap().NewFDFrom(0, event, kernel.FDFlags{
+ fd, err := t.NewFDFrom(0, event, kernel.FDFlags{
CloseOnExec: flags&EFD_CLOEXEC != 0,
- },
- t.ThreadGroup().Limits())
+ })
if err != nil {
return 0, nil, err
}
diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go
index 19f579930..2e00a91ce 100644
--- a/pkg/sentry/syscalls/linux/sys_file.go
+++ b/pkg/sentry/syscalls/linux/sys_file.go
@@ -17,31 +17,29 @@ package linux
import (
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/lock"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/tmpfs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/fasync"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/pipe"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/limits"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/lock"
+ "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/fasync"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// fileOpAt performs an operation on the second last component in the path.
-func fileOpAt(t *kernel.Task, dirFD kdefs.FD, path string, fn func(root *fs.Dirent, d *fs.Dirent, name string) error) error {
+func fileOpAt(t *kernel.Task, dirFD int32, path string, fn func(root *fs.Dirent, d *fs.Dirent, name string, remainingTraversals uint) error) error {
// Extract the last component.
dir, name := fs.SplitLast(path)
if dir == "/" {
// Common case: we are accessing a file in the root.
root := t.FSContext().RootDirectory()
- err := fn(root, root, name)
+ err := fn(root, root, name, linux.MaxSymlinkTraversals)
root.DecRef()
return err
} else if dir == "." && dirFD == linux.AT_FDCWD {
@@ -49,19 +47,19 @@ func fileOpAt(t *kernel.Task, dirFD kdefs.FD, path string, fn func(root *fs.Dire
// working directory; skip the look-up.
wd := t.FSContext().WorkingDirectory()
root := t.FSContext().RootDirectory()
- err := fn(root, wd, name)
+ err := fn(root, wd, name, linux.MaxSymlinkTraversals)
wd.DecRef()
root.DecRef()
return err
}
- return fileOpOn(t, dirFD, dir, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error {
- return fn(root, d, name)
+ return fileOpOn(t, dirFD, dir, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, remainingTraversals uint) error {
+ return fn(root, d, name, remainingTraversals)
})
}
// fileOpOn performs an operation on the last entry of the path.
-func fileOpOn(t *kernel.Task, dirFD kdefs.FD, path string, resolve bool, fn func(root *fs.Dirent, d *fs.Dirent) error) error {
+func fileOpOn(t *kernel.Task, dirFD int32, path string, resolve bool, fn func(root *fs.Dirent, d *fs.Dirent, remainingTraversals uint) error) error {
var (
d *fs.Dirent // The file.
wd *fs.Dirent // The working directory (if required.)
@@ -79,7 +77,7 @@ func fileOpOn(t *kernel.Task, dirFD kdefs.FD, path string, resolve bool, fn func
rel = wd
} else {
// Need to extract the given FD.
- f = t.FDMap().GetFile(dirFD)
+ f = t.GetFile(dirFD)
if f == nil {
return syserror.EBADF
}
@@ -110,7 +108,7 @@ func fileOpOn(t *kernel.Task, dirFD kdefs.FD, path string, resolve bool, fn func
return err
}
- err = fn(root, d)
+ err = fn(root, d, remainingTraversals)
d.DecRef()
return err
}
@@ -132,14 +130,14 @@ func copyInPath(t *kernel.Task, addr usermem.Addr, allowEmpty bool) (path string
return path, dirPath, nil
}
-func openAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, flags uint) (fd uintptr, err error) {
+func openAt(t *kernel.Task, dirFD int32, addr usermem.Addr, flags uint) (fd uintptr, err error) {
path, dirPath, err := copyInPath(t, addr, false /* allowEmpty */)
if err != nil {
return 0, err
}
resolve := flags&linux.O_NOFOLLOW == 0
- err = fileOpOn(t, dirFD, path, resolve, func(root *fs.Dirent, d *fs.Dirent) error {
+ err = fileOpOn(t, dirFD, path, resolve, func(root *fs.Dirent, d *fs.Dirent, _ uint) error {
// First check a few things about the filesystem before trying to get the file
// reference.
//
@@ -185,8 +183,9 @@ func openAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, flags uint) (fd u
defer file.DecRef()
// Success.
- fdFlags := kernel.FDFlags{CloseOnExec: flags&linux.O_CLOEXEC != 0}
- newFD, err := t.FDMap().NewFDFrom(0, file, fdFlags, t.ThreadGroup().Limits())
+ newFD, err := t.NewFDFrom(0, file, kernel.FDFlags{
+ CloseOnExec: flags&linux.O_CLOEXEC != 0,
+ })
if err != nil {
return err
}
@@ -202,7 +201,7 @@ func openAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, flags uint) (fd u
return fd, err // Use result in frame.
}
-func mknodAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, mode linux.FileMode) error {
+func mknodAt(t *kernel.Task, dirFD int32, addr usermem.Addr, mode linux.FileMode) error {
path, dirPath, err := copyInPath(t, addr, false /* allowEmpty */)
if err != nil {
return err
@@ -211,7 +210,7 @@ func mknodAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, mode linux.FileM
return syserror.ENOENT
}
- return fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string) error {
+ return fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string, _ uint) error {
if !fs.IsDir(d.Inode.StableAttr) {
return syserror.ENOTDIR
}
@@ -286,7 +285,7 @@ func Mknod(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
// Mknodat implements the linux syscall mknodat(2).
func Mknodat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- dirFD := kdefs.FD(args[0].Int())
+ dirFD := args[0].Int()
path := args[1].Pointer()
mode := linux.FileMode(args[2].ModeT())
// We don't need this argument until we support creation of device nodes.
@@ -295,7 +294,7 @@ func Mknodat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
return 0, nil, mknodAt(t, dirFD, path, mode)
}
-func createAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, flags uint, mode linux.FileMode) (fd uintptr, err error) {
+func createAt(t *kernel.Task, dirFD int32, addr usermem.Addr, flags uint, mode linux.FileMode) (fd uintptr, err error) {
path, dirPath, err := copyInPath(t, addr, false /* allowEmpty */)
if err != nil {
return 0, err
@@ -304,45 +303,108 @@ func createAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, flags uint, mod
return 0, syserror.ENOENT
}
- err = fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string) error {
- if !fs.IsDir(d.Inode.StableAttr) {
- return syserror.ENOTDIR
- }
-
- fileFlags := linuxToFlags(flags)
- // Linux always adds the O_LARGEFILE flag when running in 64-bit mode.
- fileFlags.LargeFile = true
+ fileFlags := linuxToFlags(flags)
+ // Linux always adds the O_LARGEFILE flag when running in 64-bit mode.
+ fileFlags.LargeFile = true
+
+ err = fileOpAt(t, dirFD, path, func(root *fs.Dirent, parent *fs.Dirent, name string, remainingTraversals uint) error {
+ // Resolve the name to see if it exists, and follow any
+ // symlinks along the way. We must do the symlink resolution
+ // manually because if the symlink target does not exist, we
+ // must create the target (and not the symlink itself).
+ var (
+ found *fs.Dirent
+ err error
+ )
+ for {
+ if !fs.IsDir(parent.Inode.StableAttr) {
+ return syserror.ENOTDIR
+ }
- // Does this file exist already?
- remainingTraversals := uint(linux.MaxSymlinkTraversals)
- targetDirent, err := t.MountNamespace().FindInode(t, root, d, name, &remainingTraversals)
- var newFile *fs.File
- switch err {
- case nil:
- // The file existed.
- defer targetDirent.DecRef()
+ // Start by looking up the dirent at 'name'.
+ found, err = t.MountNamespace().FindLink(t, root, parent, name, &remainingTraversals)
+ if err != nil {
+ break
+ }
+ defer found.DecRef()
- // Check if we wanted to create.
+ // We found something (possibly a symlink). If the
+ // O_EXCL flag was passed, then we can immediately
+ // return EEXIST.
if flags&linux.O_EXCL != 0 {
return syserror.EEXIST
}
+ // If we have a non-symlink, then we can proceed.
+ if !fs.IsSymlink(found.Inode.StableAttr) {
+ break
+ }
+
+ // If O_NOFOLLOW was passed, then don't try to resolve
+ // anything.
+ if flags&linux.O_NOFOLLOW != 0 {
+ return syserror.ELOOP
+ }
+
+ // Try to resolve the symlink directly to a Dirent.
+ var resolved *fs.Dirent
+ resolved, err = found.Inode.Getlink(t)
+ if err == nil {
+ // No more resolution necessary.
+ defer resolved.DecRef()
+ break
+ }
+ if err != fs.ErrResolveViaReadlink {
+ return err
+ }
+
+ // Are we able to resolve further?
+ if remainingTraversals == 0 {
+ return syscall.ELOOP
+ }
+
+ // Resolve the symlink to a path via Readlink.
+ var path string
+ path, err = found.Inode.Readlink(t)
+ if err != nil {
+ break
+ }
+ remainingTraversals--
+
+ // Get the new parent from the target path.
+ var newParent *fs.Dirent
+ newParentPath, newName := fs.SplitLast(path)
+ newParent, err = t.MountNamespace().FindInode(t, root, parent, newParentPath, &remainingTraversals)
+ if err != nil {
+ break
+ }
+ defer newParent.DecRef()
+
+ // Repeat the process with the parent and name of the
+ // symlink target.
+ parent = newParent
+ name = newName
+ }
+
+ var newFile *fs.File
+ switch err {
+ case nil:
// Like sys_open, check for a few things about the
// filesystem before trying to get a reference to the
// fs.File. The same constraints on Check apply.
- if err := targetDirent.Inode.CheckPermission(t, flagsToPermissions(flags)); err != nil {
+ if err := found.Inode.CheckPermission(t, flagsToPermissions(flags)); err != nil {
return err
}
// Should we truncate the file?
if flags&linux.O_TRUNC != 0 {
- if err := targetDirent.Inode.Truncate(t, targetDirent, 0); err != nil {
+ if err := found.Inode.Truncate(t, found, 0); err != nil {
return err
}
}
// Create a new fs.File.
- newFile, err = targetDirent.Inode.GetFile(t, targetDirent, fileFlags)
+ newFile, err = found.Inode.GetFile(t, found, fileFlags)
if err != nil {
return syserror.ConvertIntr(err, kernel.ERESTARTSYS)
}
@@ -351,26 +413,27 @@ func createAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, flags uint, mod
// File does not exist. Proceed with creation.
// Do we have write permissions on the parent?
- if err := d.Inode.CheckPermission(t, fs.PermMask{Write: true, Execute: true}); err != nil {
+ if err := parent.Inode.CheckPermission(t, fs.PermMask{Write: true, Execute: true}); err != nil {
return err
}
// Attempt a creation.
perms := fs.FilePermsFromMode(mode &^ linux.FileMode(t.FSContext().Umask()))
- newFile, err = d.Create(t, root, name, fileFlags, perms)
+ newFile, err = parent.Create(t, root, name, fileFlags, perms)
if err != nil {
// No luck, bail.
return err
}
defer newFile.DecRef()
- targetDirent = newFile.Dirent
+ found = newFile.Dirent
default:
return err
}
// Success.
- fdFlags := kernel.FDFlags{CloseOnExec: flags&linux.O_CLOEXEC != 0}
- newFD, err := t.FDMap().NewFDFrom(0, newFile, fdFlags, t.ThreadGroup().Limits())
+ newFD, err := t.NewFDFrom(0, newFile, kernel.FDFlags{
+ CloseOnExec: flags&linux.O_CLOEXEC != 0,
+ })
if err != nil {
return err
}
@@ -379,10 +442,10 @@ func createAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, flags uint, mod
fd = uintptr(newFD)
// Queue the open inotify event. The creation event is
- // automatically queued when the dirent is targetDirent. The
- // open events are implemented at the syscall layer so we need
- // to manually queue one here.
- targetDirent.InotifyEvent(linux.IN_OPEN, 0)
+ // automatically queued when the dirent is found. The open
+ // events are implemented at the syscall layer so we need to
+ // manually queue one here.
+ found.InotifyEvent(linux.IN_OPEN, 0)
return nil
})
@@ -404,7 +467,7 @@ func Open(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC
// Openat implements linux syscall openat(2).
func Openat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- dirFD := kdefs.FD(args[0].Int())
+ dirFD := args[0].Int()
addr := args[1].Pointer()
flags := uint(args[2].Uint())
if flags&linux.O_CREAT != 0 {
@@ -443,7 +506,7 @@ func (ac accessContext) Value(key interface{}) interface{} {
}
}
-func accessAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, resolve bool, mode uint) error {
+func accessAt(t *kernel.Task, dirFD int32, addr usermem.Addr, resolve bool, mode uint) error {
const rOK = 4
const wOK = 2
const xOK = 1
@@ -458,7 +521,7 @@ func accessAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, resolve bool, m
return syserror.EINVAL
}
- return fileOpOn(t, dirFD, path, resolve, func(root *fs.Dirent, d *fs.Dirent) error {
+ return fileOpOn(t, dirFD, path, resolve, func(root *fs.Dirent, d *fs.Dirent, _ uint) error {
// access(2) and faccessat(2) check permissions using real
// UID/GID, not effective UID/GID.
//
@@ -498,7 +561,7 @@ func Access(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
// Faccessat implements linux syscall faccessat(2).
func Faccessat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- dirFD := kdefs.FD(args[0].Int())
+ dirFD := args[0].Int()
addr := args[1].Pointer()
mode := args[2].ModeT()
flags := args[3].Int()
@@ -508,10 +571,10 @@ func Faccessat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
// Ioctl implements linux syscall ioctl(2).
func Ioctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
request := int(args[1].Int())
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
@@ -520,12 +583,12 @@ func Ioctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
// Shared flags between file and socket.
switch request {
case linux.FIONCLEX:
- t.FDMap().SetFlags(fd, kernel.FDFlags{
+ t.FDTable().SetFlags(fd, kernel.FDFlags{
CloseOnExec: false,
})
return 0, nil, nil
case linux.FIOCLEX:
- t.FDMap().SetFlags(fd, kernel.FDFlags{
+ t.FDTable().SetFlags(fd, kernel.FDFlags{
CloseOnExec: true,
})
return 0, nil, nil
@@ -572,7 +635,7 @@ func Ioctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
return 0, nil, err
default:
- ret, err := file.FileOperations.Ioctl(t, t.MemoryManager(), args)
+ ret, err := file.FileOperations.Ioctl(t, file, t.MemoryManager(), args)
if err != nil {
return 0, nil, err
}
@@ -626,7 +689,7 @@ func Chroot(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
return 0, nil, err
}
- return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error {
+ return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error {
// Is it a directory?
if !fs.IsDir(d.Inode.StableAttr) {
return syserror.ENOTDIR
@@ -651,7 +714,7 @@ func Chdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
return 0, nil, err
}
- return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error {
+ return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error {
// Is it a directory?
if !fs.IsDir(d.Inode.StableAttr) {
return syserror.ENOTDIR
@@ -669,9 +732,9 @@ func Chdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
// Fchdir implements the linux syscall fchdir(2).
func Fchdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
@@ -693,44 +756,47 @@ func Fchdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
// Close implements linux syscall close(2).
func Close(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
- file, ok := t.FDMap().Remove(fd)
- if !ok {
+ // Note that Remove provides a reference on the file that we may use to
+ // flush. It is still active until we drop the final reference below
+ // (and other reference-holding operations complete).
+ file := t.FDTable().Remove(fd)
+ if file == nil {
return 0, nil, syserror.EBADF
}
defer file.DecRef()
err := file.Flush(t)
- return 0, nil, handleIOError(t, false /* partial */, err, syscall.EINTR, "close", file)
+ return 0, nil, handleIOError(t, false /* partial */, err, syserror.EINTR, "close", file)
}
// Dup implements linux syscall dup(2).
func Dup(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
defer file.DecRef()
- newfd, err := t.FDMap().NewFDFrom(0, file, kernel.FDFlags{}, t.ThreadGroup().Limits())
+ newFD, err := t.NewFDFrom(0, file, kernel.FDFlags{})
if err != nil {
return 0, nil, syserror.EMFILE
}
- return uintptr(newfd), nil, nil
+ return uintptr(newFD), nil, nil
}
// Dup2 implements linux syscall dup2(2).
func Dup2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- oldfd := kdefs.FD(args[0].Int())
- newfd := kdefs.FD(args[1].Int())
+ oldfd := args[0].Int()
+ newfd := args[1].Int()
// If oldfd is a valid file descriptor, and newfd has the same value as oldfd,
// then dup2() does nothing, and returns newfd.
if oldfd == newfd {
- oldFile := t.FDMap().GetFile(oldfd)
+ oldFile := t.GetFile(oldfd)
if oldFile == nil {
return 0, nil, syserror.EBADF
}
@@ -746,21 +812,21 @@ func Dup2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC
// Dup3 implements linux syscall dup3(2).
func Dup3(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- oldfd := kdefs.FD(args[0].Int())
- newfd := kdefs.FD(args[1].Int())
+ oldfd := args[0].Int()
+ newfd := args[1].Int()
flags := args[2].Uint()
if oldfd == newfd {
return 0, nil, syserror.EINVAL
}
- oldFile := t.FDMap().GetFile(oldfd)
+ oldFile := t.GetFile(oldfd)
if oldFile == nil {
return 0, nil, syserror.EBADF
}
defer oldFile.DecRef()
- err := t.FDMap().NewFDAt(newfd, oldFile, kernel.FDFlags{CloseOnExec: flags&linux.O_CLOEXEC != 0}, t.ThreadGroup().Limits())
+ err := t.NewFDAt(newfd, oldFile, kernel.FDFlags{CloseOnExec: flags&linux.O_CLOEXEC != 0})
if err != nil {
return 0, nil, err
}
@@ -803,10 +869,10 @@ func fSetOwn(t *kernel.Task, file *fs.File, who int32) {
// Fcntl implements linux syscall fcntl(2).
func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
cmd := args[1].Int()
- file, flags := t.FDMap().GetDescriptor(fd)
+ file, flags := t.FDTable().Get(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
@@ -814,9 +880,10 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
switch cmd {
case linux.F_DUPFD, linux.F_DUPFD_CLOEXEC:
- from := kdefs.FD(args[2].Int())
- fdFlags := kernel.FDFlags{CloseOnExec: cmd == linux.F_DUPFD_CLOEXEC}
- fd, err := t.FDMap().NewFDFrom(from, file, fdFlags, t.ThreadGroup().Limits())
+ from := args[2].Int()
+ fd, err := t.NewFDFrom(from, file, kernel.FDFlags{
+ CloseOnExec: cmd == linux.F_DUPFD_CLOEXEC,
+ })
if err != nil {
return 0, nil, err
}
@@ -825,7 +892,7 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
return uintptr(flags.ToLinuxFDFlags()), nil, nil
case linux.F_SETFD:
flags := args[2].Uint()
- t.FDMap().SetFlags(fd, kernel.FDFlags{
+ t.FDTable().SetFlags(fd, kernel.FDFlags{
CloseOnExec: flags&linux.FD_CLOEXEC != 0,
})
case linux.F_GETFL:
@@ -843,7 +910,7 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
// Copy in the lock request.
flockAddr := args[2].Pointer()
- var flock syscall.Flock_t
+ var flock linux.Flock
if _, err := t.CopyIn(flockAddr, &flock); err != nil {
return 0, nil, err
}
@@ -886,17 +953,17 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
return 0, nil, err
}
- // The lock uid is that of the Task's FDMap.
- lockUniqueID := lock.UniqueID(t.FDMap().ID())
+ // The lock uid is that of the Task's FDTable.
+ lockUniqueID := lock.UniqueID(t.FDTable().ID())
// These locks don't block; execute the non-blocking operation using the inode's lock
// context directly.
switch flock.Type {
- case syscall.F_RDLCK:
+ case linux.F_RDLCK:
if !file.Flags().Read {
return 0, nil, syserror.EBADF
}
- if cmd == syscall.F_SETLK {
+ if cmd == linux.F_SETLK {
// Non-blocking lock, provide a nil lock.Blocker.
if !file.Dirent.Inode.LockCtx.Posix.LockRegion(lockUniqueID, lock.ReadLock, rng, nil) {
return 0, nil, syserror.EAGAIN
@@ -908,11 +975,11 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
}
}
return 0, nil, nil
- case syscall.F_WRLCK:
+ case linux.F_WRLCK:
if !file.Flags().Write {
return 0, nil, syserror.EBADF
}
- if cmd == syscall.F_SETLK {
+ if cmd == linux.F_SETLK {
// Non-blocking lock, provide a nil lock.Blocker.
if !file.Dirent.Inode.LockCtx.Posix.LockRegion(lockUniqueID, lock.WriteLock, rng, nil) {
return 0, nil, syserror.EAGAIN
@@ -924,7 +991,7 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
}
}
return 0, nil, nil
- case syscall.F_UNLCK:
+ case linux.F_UNLCK:
file.Dirent.Inode.LockCtx.Posix.UnlockRegion(lockUniqueID, rng)
return 0, nil, nil
default:
@@ -945,17 +1012,18 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
err := tmpfs.AddSeals(file.Dirent.Inode, args[2].Uint())
return 0, nil, err
case linux.F_GETPIPE_SZ:
- sz, ok := file.FileOperations.(pipe.Sizer)
+ sz, ok := file.FileOperations.(fs.FifoSizer)
if !ok {
return 0, nil, syserror.EINVAL
}
- return uintptr(sz.PipeSize()), nil, nil
+ size, err := sz.FifoSize(t, file)
+ return uintptr(size), nil, err
case linux.F_SETPIPE_SZ:
- sz, ok := file.FileOperations.(pipe.Sizer)
+ sz, ok := file.FileOperations.(fs.FifoSizer)
if !ok {
return 0, nil, syserror.EINVAL
}
- n, err := sz.SetPipeSize(int64(args[2].Int()))
+ n, err := sz.SetFifoSize(int64(args[2].Int()))
return uintptr(n), nil, err
default:
// Everything else is not yet supported.
@@ -976,7 +1044,7 @@ const (
// Fadvise64 implements linux syscall fadvise64(2).
// This implementation currently ignores the provided advice.
func Fadvise64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
length := args[2].Int64()
advice := args[3].Int()
@@ -985,7 +1053,7 @@ func Fadvise64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
return 0, nil, syserror.EINVAL
}
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
@@ -1011,13 +1079,13 @@ func Fadvise64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
return 0, nil, nil
}
-func mkdirAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, mode linux.FileMode) error {
+func mkdirAt(t *kernel.Task, dirFD int32, addr usermem.Addr, mode linux.FileMode) error {
path, _, err := copyInPath(t, addr, false /* allowEmpty */)
if err != nil {
return err
}
- return fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string) error {
+ return fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string, _ uint) error {
if !fs.IsDir(d.Inode.StableAttr) {
return syserror.ENOTDIR
}
@@ -1056,14 +1124,14 @@ func Mkdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
// Mkdirat implements linux syscall mkdirat(2).
func Mkdirat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- dirFD := kdefs.FD(args[0].Int())
+ dirFD := args[0].Int()
addr := args[1].Pointer()
mode := linux.FileMode(args[2].ModeT())
return 0, nil, mkdirAt(t, dirFD, addr, mode)
}
-func rmdirAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr) error {
+func rmdirAt(t *kernel.Task, dirFD int32, addr usermem.Addr) error {
path, _, err := copyInPath(t, addr, false /* allowEmpty */)
if err != nil {
return err
@@ -1074,7 +1142,7 @@ func rmdirAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr) error {
return syserror.EBUSY
}
- return fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string) error {
+ return fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string, _ uint) error {
if !fs.IsDir(d.Inode.StableAttr) {
return syserror.ENOTDIR
}
@@ -1103,7 +1171,7 @@ func Rmdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
return 0, nil, rmdirAt(t, linux.AT_FDCWD, addr)
}
-func symlinkAt(t *kernel.Task, dirFD kdefs.FD, newAddr usermem.Addr, oldAddr usermem.Addr) error {
+func symlinkAt(t *kernel.Task, dirFD int32, newAddr usermem.Addr, oldAddr usermem.Addr) error {
newPath, dirPath, err := copyInPath(t, newAddr, false /* allowEmpty */)
if err != nil {
return err
@@ -1122,7 +1190,7 @@ func symlinkAt(t *kernel.Task, dirFD kdefs.FD, newAddr usermem.Addr, oldAddr use
return syserror.ENOENT
}
- return fileOpAt(t, dirFD, newPath, func(root *fs.Dirent, d *fs.Dirent, name string) error {
+ return fileOpAt(t, dirFD, newPath, func(root *fs.Dirent, d *fs.Dirent, name string, _ uint) error {
if !fs.IsDir(d.Inode.StableAttr) {
return syserror.ENOTDIR
}
@@ -1146,7 +1214,7 @@ func Symlink(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
// Symlinkat implements linux syscall symlinkat(2).
func Symlinkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
oldAddr := args[0].Pointer()
- dirFD := kdefs.FD(args[1].Int())
+ dirFD := args[1].Int()
newAddr := args[2].Pointer()
return 0, nil, symlinkAt(t, dirFD, newAddr, oldAddr)
@@ -1188,7 +1256,7 @@ func mayLinkAt(t *kernel.Task, target *fs.Inode) error {
// linkAt creates a hard link to the target specified by oldDirFD and oldAddr,
// specified by newDirFD and newAddr. If resolve is true, then the symlinks
// will be followed when evaluating the target.
-func linkAt(t *kernel.Task, oldDirFD kdefs.FD, oldAddr usermem.Addr, newDirFD kdefs.FD, newAddr usermem.Addr, resolve, allowEmpty bool) error {
+func linkAt(t *kernel.Task, oldDirFD int32, oldAddr usermem.Addr, newDirFD int32, newAddr usermem.Addr, resolve, allowEmpty bool) error {
oldPath, _, err := copyInPath(t, oldAddr, allowEmpty)
if err != nil {
return err
@@ -1202,7 +1270,7 @@ func linkAt(t *kernel.Task, oldDirFD kdefs.FD, oldAddr usermem.Addr, newDirFD kd
}
if allowEmpty && oldPath == "" {
- target := t.FDMap().GetFile(oldDirFD)
+ target := t.GetFile(oldDirFD)
if target == nil {
return syserror.EBADF
}
@@ -1212,7 +1280,7 @@ func linkAt(t *kernel.Task, oldDirFD kdefs.FD, oldAddr usermem.Addr, newDirFD kd
}
// Resolve the target directory.
- return fileOpAt(t, newDirFD, newPath, func(root *fs.Dirent, newParent *fs.Dirent, newName string) error {
+ return fileOpAt(t, newDirFD, newPath, func(root *fs.Dirent, newParent *fs.Dirent, newName string, _ uint) error {
if !fs.IsDir(newParent.Inode.StableAttr) {
return syserror.ENOTDIR
}
@@ -1227,13 +1295,13 @@ func linkAt(t *kernel.Task, oldDirFD kdefs.FD, oldAddr usermem.Addr, newDirFD kd
// Resolve oldDirFD and oldAddr to a dirent. The "resolve" argument
// only applies to this name.
- return fileOpOn(t, oldDirFD, oldPath, resolve, func(root *fs.Dirent, target *fs.Dirent) error {
+ return fileOpOn(t, oldDirFD, oldPath, resolve, func(root *fs.Dirent, target *fs.Dirent, _ uint) error {
if err := mayLinkAt(t, target.Inode); err != nil {
return err
}
// Next resolve newDirFD and newAddr to the parent dirent and name.
- return fileOpAt(t, newDirFD, newPath, func(root *fs.Dirent, newParent *fs.Dirent, newName string) error {
+ return fileOpAt(t, newDirFD, newPath, func(root *fs.Dirent, newParent *fs.Dirent, newName string, _ uint) error {
if !fs.IsDir(newParent.Inode.StableAttr) {
return syserror.ENOTDIR
}
@@ -1264,9 +1332,9 @@ func Link(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC
// Linkat implements linux syscall linkat(2).
func Linkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- oldDirFD := kdefs.FD(args[0].Int())
+ oldDirFD := args[0].Int()
oldAddr := args[1].Pointer()
- newDirFD := kdefs.FD(args[2].Int())
+ newDirFD := args[2].Int()
newAddr := args[3].Pointer()
// man linkat(2):
@@ -1291,7 +1359,7 @@ func Linkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
return 0, nil, linkAt(t, oldDirFD, oldAddr, newDirFD, newAddr, resolve, allowEmpty)
}
-func readlinkAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, bufAddr usermem.Addr, size uint) (copied uintptr, err error) {
+func readlinkAt(t *kernel.Task, dirFD int32, addr usermem.Addr, bufAddr usermem.Addr, size uint) (copied uintptr, err error) {
path, dirPath, err := copyInPath(t, addr, false /* allowEmpty */)
if err != nil {
return 0, err
@@ -1300,7 +1368,7 @@ func readlinkAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, bufAddr userm
return 0, syserror.ENOENT
}
- err = fileOpOn(t, dirFD, path, false /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error {
+ err = fileOpOn(t, dirFD, path, false /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error {
// Check for Read permission.
if err := d.Inode.CheckPermission(t, fs.PermMask{Read: true}); err != nil {
return err
@@ -1341,7 +1409,7 @@ func Readlink(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
// Readlinkat implements linux syscall readlinkat(2).
func Readlinkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- dirFD := kdefs.FD(args[0].Int())
+ dirFD := args[0].Int()
addr := args[1].Pointer()
bufAddr := args[2].Pointer()
size := args[3].SizeT()
@@ -1350,7 +1418,7 @@ func Readlinkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy
return n, nil, err
}
-func unlinkAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr) error {
+func unlinkAt(t *kernel.Task, dirFD int32, addr usermem.Addr) error {
path, dirPath, err := copyInPath(t, addr, false /* allowEmpty */)
if err != nil {
return err
@@ -1359,7 +1427,7 @@ func unlinkAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr) error {
return syserror.ENOENT
}
- return fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string) error {
+ return fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string, _ uint) error {
if !fs.IsDir(d.Inode.StableAttr) {
return syserror.ENOTDIR
}
@@ -1380,7 +1448,7 @@ func Unlink(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
// Unlinkat implements linux syscall unlinkat(2).
func Unlinkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- dirFD := kdefs.FD(args[0].Int())
+ dirFD := args[0].Int()
addr := args[1].Pointer()
flags := args[2].Uint()
if flags&linux.AT_REMOVEDIR != 0 {
@@ -1408,13 +1476,13 @@ func Truncate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
if uint64(length) >= t.ThreadGroup().Limits().Get(limits.FileSize).Cur {
t.SendSignal(&arch.SignalInfo{
- Signo: int32(syscall.SIGXFSZ),
+ Signo: int32(linux.SIGXFSZ),
Code: arch.SignalInfoUser,
})
return 0, nil, syserror.EFBIG
}
- return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error {
+ return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error {
if fs.IsDir(d.Inode.StableAttr) {
return syserror.EISDIR
}
@@ -1441,10 +1509,10 @@ func Truncate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
// Ftruncate implements linux syscall ftruncate(2).
func Ftruncate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
length := args[1].Int64()
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
@@ -1468,7 +1536,7 @@ func Ftruncate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
if uint64(length) >= t.ThreadGroup().Limits().Get(limits.FileSize).Cur {
t.SendSignal(&arch.SignalInfo{
- Signo: int32(syscall.SIGXFSZ),
+ Signo: int32(linux.SIGXFSZ),
Code: arch.SignalInfoUser,
})
return 0, nil, syserror.EFBIG
@@ -1559,7 +1627,7 @@ func chown(t *kernel.Task, d *fs.Dirent, uid auth.UID, gid auth.GID) error {
return nil
}
-func chownAt(t *kernel.Task, fd kdefs.FD, addr usermem.Addr, resolve, allowEmpty bool, uid auth.UID, gid auth.GID) error {
+func chownAt(t *kernel.Task, fd int32, addr usermem.Addr, resolve, allowEmpty bool, uid auth.UID, gid auth.GID) error {
path, _, err := copyInPath(t, addr, allowEmpty)
if err != nil {
return err
@@ -1567,7 +1635,7 @@ func chownAt(t *kernel.Task, fd kdefs.FD, addr usermem.Addr, resolve, allowEmpty
if path == "" {
// Annoying. What's wrong with fchown?
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
return syserror.EBADF
}
@@ -1576,7 +1644,7 @@ func chownAt(t *kernel.Task, fd kdefs.FD, addr usermem.Addr, resolve, allowEmpty
return chown(t, file.Dirent, uid, gid)
}
- return fileOpOn(t, fd, path, resolve, func(root *fs.Dirent, d *fs.Dirent) error {
+ return fileOpOn(t, fd, path, resolve, func(root *fs.Dirent, d *fs.Dirent, _ uint) error {
return chown(t, d, uid, gid)
})
}
@@ -1601,11 +1669,11 @@ func Lchown(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
// Fchown implements linux syscall fchown(2).
func Fchown(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
uid := auth.UID(args[1].Uint())
gid := auth.GID(args[2].Uint())
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
@@ -1616,7 +1684,7 @@ func Fchown(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
// Fchownat implements Linux syscall fchownat(2).
func Fchownat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- dirFD := kdefs.FD(args[0].Int())
+ dirFD := args[0].Int()
addr := args[1].Pointer()
uid := auth.UID(args[2].Uint())
gid := auth.GID(args[3].Uint())
@@ -1646,13 +1714,13 @@ func chmod(t *kernel.Task, d *fs.Dirent, mode linux.FileMode) error {
return nil
}
-func chmodAt(t *kernel.Task, fd kdefs.FD, addr usermem.Addr, mode linux.FileMode) error {
+func chmodAt(t *kernel.Task, fd int32, addr usermem.Addr, mode linux.FileMode) error {
path, _, err := copyInPath(t, addr, false /* allowEmpty */)
if err != nil {
return err
}
- return fileOpOn(t, fd, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error {
+ return fileOpOn(t, fd, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error {
return chmod(t, d, mode)
})
}
@@ -1667,10 +1735,10 @@ func Chmod(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
// Fchmod implements linux syscall fchmod(2).
func Fchmod(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
mode := linux.FileMode(args[1].ModeT())
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
@@ -1681,7 +1749,7 @@ func Fchmod(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
// Fchmodat implements linux syscall fchmodat(2).
func Fchmodat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
addr := args[1].Pointer()
mode := linux.FileMode(args[2].ModeT())
@@ -1697,8 +1765,8 @@ func defaultSetToSystemTimeSpec() fs.TimeSpec {
}
}
-func utimes(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, ts fs.TimeSpec, resolve bool) error {
- setTimestamp := func(root *fs.Dirent, d *fs.Dirent) error {
+func utimes(t *kernel.Task, dirFD int32, addr usermem.Addr, ts fs.TimeSpec, resolve bool) error {
+ setTimestamp := func(root *fs.Dirent, d *fs.Dirent, _ uint) error {
// Does the task own the file?
if !d.Inode.CheckOwnership(t) {
// Trying to set a specific time? Must be owner.
@@ -1730,7 +1798,7 @@ func utimes(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, ts fs.TimeSpec, r
// Linux returns EINVAL in this case. See utimes.c.
return syserror.EINVAL
}
- f := t.FDMap().GetFile(dirFD)
+ f := t.GetFile(dirFD)
if f == nil {
return syserror.EBADF
}
@@ -1739,7 +1807,7 @@ func utimes(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, ts fs.TimeSpec, r
root := t.FSContext().RootDirectory()
defer root.DecRef()
- return setTimestamp(root, f.Dirent)
+ return setTimestamp(root, f.Dirent, linux.MaxSymlinkTraversals)
}
path, _, err := copyInPath(t, addr, false /* allowEmpty */)
@@ -1758,7 +1826,7 @@ func Utime(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
// No timesAddr argument will be interpreted as current system time.
ts := defaultSetToSystemTimeSpec()
if timesAddr != 0 {
- var times syscall.Utimbuf
+ var times linux.Utime
if _, err := t.CopyIn(timesAddr, &times); err != nil {
return 0, nil, err
}
@@ -1798,7 +1866,7 @@ func timespecIsValid(ts linux.Timespec) bool {
// Utimensat implements linux syscall utimensat(2).
func Utimensat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- dirFD := kdefs.FD(args[0].Int())
+ dirFD := args[0].Int()
pathnameAddr := args[1].Pointer()
timesAddr := args[2].Pointer()
flags := args[3].Int()
@@ -1833,7 +1901,7 @@ func Utimensat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
// Futimesat implements linux syscall futimesat(2).
func Futimesat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- dirFD := kdefs.FD(args[0].Int())
+ dirFD := args[0].Int()
pathnameAddr := args[1].Pointer()
timesAddr := args[2].Pointer()
@@ -1857,7 +1925,7 @@ func Futimesat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
return 0, nil, utimes(t, dirFD, pathnameAddr, ts, true)
}
-func renameAt(t *kernel.Task, oldDirFD kdefs.FD, oldAddr usermem.Addr, newDirFD kdefs.FD, newAddr usermem.Addr) error {
+func renameAt(t *kernel.Task, oldDirFD int32, oldAddr usermem.Addr, newDirFD int32, newAddr usermem.Addr) error {
newPath, _, err := copyInPath(t, newAddr, false /* allowEmpty */)
if err != nil {
return err
@@ -1867,7 +1935,7 @@ func renameAt(t *kernel.Task, oldDirFD kdefs.FD, oldAddr usermem.Addr, newDirFD
return err
}
- return fileOpAt(t, oldDirFD, oldPath, func(root *fs.Dirent, oldParent *fs.Dirent, oldName string) error {
+ return fileOpAt(t, oldDirFD, oldPath, func(root *fs.Dirent, oldParent *fs.Dirent, oldName string, _ uint) error {
if !fs.IsDir(oldParent.Inode.StableAttr) {
return syserror.ENOTDIR
}
@@ -1879,7 +1947,7 @@ func renameAt(t *kernel.Task, oldDirFD kdefs.FD, oldAddr usermem.Addr, newDirFD
return syserror.EBUSY
}
- return fileOpAt(t, newDirFD, newPath, func(root *fs.Dirent, newParent *fs.Dirent, newName string) error {
+ return fileOpAt(t, newDirFD, newPath, func(root *fs.Dirent, newParent *fs.Dirent, newName string, _ uint) error {
if !fs.IsDir(newParent.Inode.StableAttr) {
return syserror.ENOTDIR
}
@@ -1905,21 +1973,21 @@ func Rename(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
// Renameat implements linux syscall renameat(2).
func Renameat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- oldDirFD := kdefs.FD(args[0].Int())
+ oldDirFD := args[0].Int()
oldPathAddr := args[1].Pointer()
- newDirFD := kdefs.FD(args[2].Int())
+ newDirFD := args[2].Int()
newPathAddr := args[3].Pointer()
return 0, nil, renameAt(t, oldDirFD, oldPathAddr, newDirFD, newPathAddr)
}
// Fallocate implements linux system call fallocate(2).
func Fallocate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
mode := args[1].Int64()
offset := args[2].Int64()
length := args[3].Int64()
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
@@ -1950,7 +2018,7 @@ func Fallocate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
}
if uint64(size) >= t.ThreadGroup().Limits().Get(limits.FileSize).Cur {
t.SendSignal(&arch.SignalInfo{
- Signo: int32(syscall.SIGXFSZ),
+ Signo: int32(linux.SIGXFSZ),
Code: arch.SignalInfoUser,
})
return 0, nil, syserror.EFBIG
@@ -1968,10 +2036,10 @@ func Fallocate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
// Flock implements linux syscall flock(2).
func Flock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
operation := args[1].Int()
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
// flock(2): EBADF fd is not an open file descriptor.
return 0, nil, syserror.EBADF
@@ -2067,7 +2135,7 @@ func MemfdCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S
name = memfdPrefix + name
inode := tmpfs.NewMemfdInode(t, allowSeals)
- dirent := fs.NewDirent(inode, name)
+ dirent := fs.NewDirent(t, inode, name)
// Per Linux, mm/shmem.c:__shmem_file_setup(), memfd files are set up with
// FMODE_READ | FMODE_WRITE.
file, err := inode.GetFile(t, dirent, fs.FileFlags{Read: true, Write: true})
@@ -2078,8 +2146,9 @@ func MemfdCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S
defer dirent.DecRef()
defer file.DecRef()
- fdFlags := kernel.FDFlags{CloseOnExec: cloExec}
- newFD, err := t.FDMap().NewFDFrom(0, file, fdFlags, t.ThreadGroup().Limits())
+ newFD, err := t.NewFDFrom(0, file, kernel.FDFlags{
+ CloseOnExec: cloExec,
+ })
if err != nil {
return 0, nil, err
}
diff --git a/pkg/sentry/syscalls/linux/sys_futex.go b/pkg/sentry/syscalls/linux/sys_futex.go
index 7cef4b50c..b9bd25464 100644
--- a/pkg/sentry/syscalls/linux/sys_futex.go
+++ b/pkg/sentry/syscalls/linux/sys_futex.go
@@ -17,12 +17,12 @@ package linux
import (
"time"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// futexWaitRestartBlock encapsulates the state required to restart futex(2)
diff --git a/pkg/sentry/syscalls/linux/sys_getdents.go b/pkg/sentry/syscalls/linux/sys_getdents.go
index 1b597d5bc..63e2c5a5d 100644
--- a/pkg/sentry/syscalls/linux/sys_getdents.go
+++ b/pkg/sentry/syscalls/linux/sys_getdents.go
@@ -17,20 +17,19 @@ package linux
import (
"bytes"
"io"
- "syscall"
-
- "gvisor.googlesource.com/gvisor/pkg/binary"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// Getdents implements linux syscall getdents(2) for 64bit systems.
func Getdents(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
addr := args[1].Pointer()
size := int(args[2].Uint())
@@ -46,7 +45,7 @@ func Getdents(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
// Getdents64 implements linux syscall getdents64(2).
func Getdents64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
addr := args[1].Pointer()
size := int(args[2].Uint())
@@ -62,8 +61,8 @@ func Getdents64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy
// getdents implements the core of getdents(2)/getdents64(2).
// f is the syscall implementation dirent serialization function.
-func getdents(t *kernel.Task, fd kdefs.FD, addr usermem.Addr, size int, f func(*dirent, io.Writer) (int, error)) (uintptr, error) {
- dir := t.FDMap().GetFile(fd)
+func getdents(t *kernel.Task, fd int32, addr usermem.Addr, size int, f func(*dirent, io.Writer) (int, error)) (uintptr, error) {
+ dir := t.GetFile(fd)
if dir == nil {
return 0, syserror.EBADF
}
@@ -83,7 +82,7 @@ func getdents(t *kernel.Task, fd kdefs.FD, addr usermem.Addr, size int, f func(*
switch err := handleIOError(t, ds.Written() > 0, rerr, kernel.ERESTARTSYS, "getdents", dir); err {
case nil:
- dir.Dirent.InotifyEvent(syscall.IN_ACCESS, 0)
+ dir.Dirent.InotifyEvent(linux.IN_ACCESS, 0)
return uintptr(ds.Written()), nil
case io.EOF:
return 0, nil
@@ -147,21 +146,21 @@ func smallestDirent64(a arch.Context) uint {
func toType(nodeType fs.InodeType) uint8 {
switch nodeType {
case fs.RegularFile, fs.SpecialFile:
- return syscall.DT_REG
+ return linux.DT_REG
case fs.Symlink:
- return syscall.DT_LNK
+ return linux.DT_LNK
case fs.Directory, fs.SpecialDirectory:
- return syscall.DT_DIR
+ return linux.DT_DIR
case fs.Pipe:
- return syscall.DT_FIFO
+ return linux.DT_FIFO
case fs.CharacterDevice:
- return syscall.DT_CHR
+ return linux.DT_CHR
case fs.BlockDevice:
- return syscall.DT_BLK
+ return linux.DT_BLK
case fs.Socket:
- return syscall.DT_SOCK
+ return linux.DT_SOCK
default:
- return syscall.DT_UNKNOWN
+ return linux.DT_UNKNOWN
}
}
diff --git a/pkg/sentry/syscalls/linux/sys_identity.go b/pkg/sentry/syscalls/linux/sys_identity.go
index 27e765a2d..715ac45e6 100644
--- a/pkg/sentry/syscalls/linux/sys_identity.go
+++ b/pkg/sentry/syscalls/linux/sys_identity.go
@@ -15,10 +15,10 @@
package linux
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/syserror"
)
const (
diff --git a/pkg/sentry/syscalls/linux/sys_inotify.go b/pkg/sentry/syscalls/linux/sys_inotify.go
index 20269a769..b2c7b3444 100644
--- a/pkg/sentry/syscalls/linux/sys_inotify.go
+++ b/pkg/sentry/syscalls/linux/sys_inotify.go
@@ -15,14 +15,12 @@
package linux
import (
- "syscall"
-
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/anon"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/anon"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/syserror"
)
const allFlags = int(linux.IN_NONBLOCK | linux.IN_CLOEXEC)
@@ -32,10 +30,10 @@ func InotifyInit1(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.
flags := int(args[0].Int())
if flags&^allFlags != 0 {
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
- dirent := fs.NewDirent(anon.NewInode(t), "inotify")
+ dirent := fs.NewDirent(t, anon.NewInode(t), "inotify")
fileFlags := fs.FileFlags{
Read: true,
Write: true,
@@ -44,9 +42,9 @@ func InotifyInit1(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.
n := fs.NewFile(t, dirent, fileFlags, fs.NewInotify(t))
defer n.DecRef()
- fd, err := t.FDMap().NewFDFrom(0, n, kernel.FDFlags{
+ fd, err := t.NewFDFrom(0, n, kernel.FDFlags{
CloseOnExec: flags&linux.IN_CLOEXEC != 0,
- }, t.ThreadGroup().Limits())
+ })
if err != nil {
return 0, nil, err
@@ -63,18 +61,18 @@ func InotifyInit(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S
// fdToInotify resolves an fd to an inotify object. If successful, the file will
// have an extra ref and the caller is responsible for releasing the ref.
-func fdToInotify(t *kernel.Task, fd kdefs.FD) (*fs.Inotify, *fs.File, error) {
- file := t.FDMap().GetFile(fd)
+func fdToInotify(t *kernel.Task, fd int32) (*fs.Inotify, *fs.File, error) {
+ file := t.GetFile(fd)
if file == nil {
// Invalid fd.
- return nil, nil, syscall.EBADF
+ return nil, nil, syserror.EBADF
}
ino, ok := file.FileOperations.(*fs.Inotify)
if !ok {
// Not an inotify fd.
file.DecRef()
- return nil, nil, syscall.EINVAL
+ return nil, nil, syserror.EINVAL
}
return ino, file, nil
@@ -82,7 +80,7 @@ func fdToInotify(t *kernel.Task, fd kdefs.FD) (*fs.Inotify, *fs.File, error) {
// InotifyAddWatch implements the inotify_add_watch() syscall.
func InotifyAddWatch(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
addr := args[1].Pointer()
mask := args[2].Uint()
@@ -93,7 +91,7 @@ func InotifyAddWatch(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kern
// "EINVAL: The given event mask contains no valid events."
// -- inotify_add_watch(2)
if validBits := mask & linux.ALL_INOTIFY_BITS; validBits == 0 {
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
ino, file, err := fdToInotify(t, fd)
@@ -107,14 +105,14 @@ func InotifyAddWatch(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kern
return 0, nil, err
}
- err = fileOpOn(t, linux.AT_FDCWD, path, resolve, func(root *fs.Dirent, dirent *fs.Dirent) error {
+ err = fileOpOn(t, linux.AT_FDCWD, path, resolve, func(root *fs.Dirent, dirent *fs.Dirent, _ uint) error {
// "IN_ONLYDIR: Only watch pathname if it is a directory." -- inotify(7)
if onlyDir := mask&linux.IN_ONLYDIR != 0; onlyDir && !fs.IsDir(dirent.Inode.StableAttr) {
- return syscall.ENOTDIR
+ return syserror.ENOTDIR
}
// Copy out to the return frame.
- fd = kdefs.FD(ino.AddWatch(dirent, mask))
+ fd = ino.AddWatch(dirent, mask)
return nil
})
@@ -123,7 +121,7 @@ func InotifyAddWatch(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kern
// InotifyRmWatch implements the inotify_rm_watch() syscall.
func InotifyRmWatch(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
wd := args[1].Int()
ino, file, err := fdToInotify(t, fd)
diff --git a/pkg/sentry/syscalls/linux/sys_lseek.go b/pkg/sentry/syscalls/linux/sys_lseek.go
index 8aadc6d8c..297e920c4 100644
--- a/pkg/sentry/syscalls/linux/sys_lseek.go
+++ b/pkg/sentry/syscalls/linux/sys_lseek.go
@@ -15,20 +15,19 @@
package linux
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// Lseek implements linux syscall lseek(2).
func Lseek(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
offset := args[1].Int64()
whence := args[2].Int()
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
diff --git a/pkg/sentry/syscalls/linux/sys_mempolicy.go b/pkg/sentry/syscalls/linux/sys_mempolicy.go
index 652b2c206..f5a519d8a 100644
--- a/pkg/sentry/syscalls/linux/sys_mempolicy.go
+++ b/pkg/sentry/syscalls/linux/sys_mempolicy.go
@@ -17,11 +17,11 @@ package linux
import (
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// We unconditionally report a single NUMA node. This also means that our
diff --git a/pkg/sentry/syscalls/linux/sys_mmap.go b/pkg/sentry/syscalls/linux/sys_mmap.go
index 9926f0ac5..58a05b5bb 100644
--- a/pkg/sentry/syscalls/linux/sys_mmap.go
+++ b/pkg/sentry/syscalls/linux/sys_mmap.go
@@ -17,14 +17,13 @@ package linux
import (
"bytes"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
- "gvisor.googlesource.com/gvisor/pkg/sentry/mm"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/mm"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// Brk implements linux syscall brk(2).
@@ -40,7 +39,7 @@ func Brk(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallCo
func Mmap(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
prot := args[2].Int()
flags := args[3].Int()
- fd := kdefs.FD(args[4].Int())
+ fd := args[4].Int()
fixed := flags&linux.MAP_FIXED != 0
private := flags&linux.MAP_PRIVATE != 0
shared := flags&linux.MAP_SHARED != 0
@@ -80,7 +79,7 @@ func Mmap(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC
if !anon {
// Convert the passed FD to a file reference.
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
@@ -180,6 +179,10 @@ func Madvise(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
switch adv {
case linux.MADV_DONTNEED:
return 0, nil, t.MemoryManager().Decommit(addr, length)
+ case linux.MADV_DOFORK:
+ return 0, nil, t.MemoryManager().SetDontFork(addr, length, false)
+ case linux.MADV_DONTFORK:
+ return 0, nil, t.MemoryManager().SetDontFork(addr, length, true)
case linux.MADV_HUGEPAGE, linux.MADV_NOHUGEPAGE:
fallthrough
case linux.MADV_MERGEABLE, linux.MADV_UNMERGEABLE:
@@ -191,7 +194,7 @@ func Madvise(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
case linux.MADV_NORMAL, linux.MADV_RANDOM, linux.MADV_SEQUENTIAL, linux.MADV_WILLNEED:
// Do nothing, we totally ignore the suggestions above.
return 0, nil, nil
- case linux.MADV_REMOVE, linux.MADV_DOFORK, linux.MADV_DONTFORK:
+ case linux.MADV_REMOVE:
// These "suggestions" have application-visible side effects, so we
// have to indicate that we don't support them.
return 0, nil, syserror.ENOSYS
diff --git a/pkg/sentry/syscalls/linux/sys_mount.go b/pkg/sentry/syscalls/linux/sys_mount.go
index cf613bad0..9080a10c3 100644
--- a/pkg/sentry/syscalls/linux/sys_mount.go
+++ b/pkg/sentry/syscalls/linux/sys_mount.go
@@ -15,12 +15,12 @@
package linux
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// Mount implements Linux syscall mount(2).
@@ -109,7 +109,7 @@ func Mount(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
return 0, nil, syserror.EINVAL
}
- return 0, nil, fileOpOn(t, linux.AT_FDCWD, targetPath, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error {
+ return 0, nil, fileOpOn(t, linux.AT_FDCWD, targetPath, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error {
return t.MountNamespace().Mount(t, d, rootInode)
})
}
@@ -140,7 +140,7 @@ func Umount2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
resolve := flags&linux.UMOUNT_NOFOLLOW != linux.UMOUNT_NOFOLLOW
detachOnly := flags&linux.MNT_DETACH == linux.MNT_DETACH
- return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, resolve, func(root *fs.Dirent, d *fs.Dirent) error {
+ return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, resolve, func(root *fs.Dirent, d *fs.Dirent, _ uint) error {
return t.MountNamespace().Unmount(t, d, detachOnly)
})
}
diff --git a/pkg/sentry/syscalls/linux/sys_pipe.go b/pkg/sentry/syscalls/linux/sys_pipe.go
index 036845c13..418d7fa5f 100644
--- a/pkg/sentry/syscalls/linux/sys_pipe.go
+++ b/pkg/sentry/syscalls/linux/sys_pipe.go
@@ -15,20 +15,19 @@
package linux
import (
- "syscall"
-
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/pipe"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/pipe"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// pipe2 implements the actual system call with flags.
func pipe2(t *kernel.Task, addr usermem.Addr, flags uint) (uintptr, error) {
if flags&^(linux.O_NONBLOCK|linux.O_CLOEXEC) != 0 {
- return 0, syscall.EINVAL
+ return 0, syserror.EINVAL
}
r, w := pipe.NewConnectedPipe(t, pipe.DefaultPipeSize, usermem.PageSize)
@@ -38,25 +37,18 @@ func pipe2(t *kernel.Task, addr usermem.Addr, flags uint) (uintptr, error) {
w.SetFlags(linuxToFlags(flags).Settable())
defer w.DecRef()
- rfd, err := t.FDMap().NewFDFrom(0, r, kernel.FDFlags{
- CloseOnExec: flags&linux.O_CLOEXEC != 0},
- t.ThreadGroup().Limits())
- if err != nil {
- return 0, err
- }
-
- wfd, err := t.FDMap().NewFDFrom(0, w, kernel.FDFlags{
- CloseOnExec: flags&linux.O_CLOEXEC != 0},
- t.ThreadGroup().Limits())
+ fds, err := t.NewFDs(0, []*fs.File{r, w}, kernel.FDFlags{
+ CloseOnExec: flags&linux.O_CLOEXEC != 0,
+ })
if err != nil {
- t.FDMap().Remove(rfd)
return 0, err
}
- if _, err := t.CopyOut(addr, []kdefs.FD{rfd, wfd}); err != nil {
- t.FDMap().Remove(rfd)
- t.FDMap().Remove(wfd)
- return 0, syscall.EFAULT
+ if _, err := t.CopyOut(addr, fds); err != nil {
+ // The files are not closed in this case, the exact semantics
+ // of this error case are not well defined, but they could have
+ // already been observed by user space.
+ return 0, syserror.EFAULT
}
return 0, nil
}
diff --git a/pkg/sentry/syscalls/linux/sys_poll.go b/pkg/sentry/syscalls/linux/sys_poll.go
index e32099dd4..7a13beac2 100644
--- a/pkg/sentry/syscalls/linux/sys_poll.go
+++ b/pkg/sentry/syscalls/linux/sys_poll.go
@@ -17,16 +17,15 @@ package linux
import (
"time"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/limits"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// fileCap is the maximum allowable files for poll & select.
@@ -64,7 +63,7 @@ func initReadiness(t *kernel.Task, pfd *linux.PollFD, state *pollState, ch chan
return
}
- file := t.FDMap().GetFile(kdefs.FD(pfd.FD))
+ file := t.GetFile(pfd.FD)
if file == nil {
pfd.REvents = linux.POLLNVAL
return
@@ -265,7 +264,7 @@ func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs usermem.Add
// immediately to ensure we don't leak. Note, another thread
// might be about to close fd. This is racy, but that's
// OK. Linux is racy in the same way.
- file := t.FDMap().GetFile(kdefs.FD(fd))
+ file := t.GetFile(fd)
if file == nil {
return 0, syserror.EBADF
}
diff --git a/pkg/sentry/syscalls/linux/sys_prctl.go b/pkg/sentry/syscalls/linux/sys_prctl.go
index 1b7e5616b..98db32d77 100644
--- a/pkg/sentry/syscalls/linux/sys_prctl.go
+++ b/pkg/sentry/syscalls/linux/sys_prctl.go
@@ -16,15 +16,14 @@ package linux
import (
"fmt"
- "syscall"
-
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/mm"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/mm"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// Prctl implements linux syscall prctl(2).
@@ -37,7 +36,7 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
case linux.PR_SET_PDEATHSIG:
sig := linux.Signal(args[1].Int())
if sig != 0 && !sig.IsValid() {
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
t.SetParentDeathSignal(sig)
return 0, nil, nil
@@ -68,7 +67,7 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
d = mm.UserDumpable
default:
// N.B. Userspace may not pass SUID_DUMP_ROOT.
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
t.MemoryManager().SetDumpability(d)
return 0, nil, nil
@@ -89,7 +88,7 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
} else if val == 1 {
t.SetKeepCaps(true)
} else {
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
return 0, nil, nil
@@ -97,7 +96,7 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
case linux.PR_SET_NAME:
addr := args[1].Pointer()
name, err := t.CopyInString(addr, linux.TASK_COMM_LEN-1)
- if err != nil && err != syscall.ENAMETOOLONG {
+ if err != nil && err != syserror.ENAMETOOLONG {
return 0, nil, err
}
t.SetName(name)
@@ -117,22 +116,22 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
case linux.PR_SET_MM:
if !t.HasCapability(linux.CAP_SYS_RESOURCE) {
- return 0, nil, syscall.EPERM
+ return 0, nil, syserror.EPERM
}
switch args[1].Int() {
case linux.PR_SET_MM_EXE_FILE:
- fd := kdefs.FD(args[2].Int())
+ fd := args[2].Int()
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
- return 0, nil, syscall.EBADF
+ return 0, nil, syserror.EBADF
}
defer file.DecRef()
// They trying to set exe to a non-file?
if !fs.IsFile(file.Dirent.Inode.StableAttr) {
- return 0, nil, syscall.EBADF
+ return 0, nil, syserror.EBADF
}
// Set the underlying executable.
@@ -154,12 +153,12 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
t.Kernel().EmitUnimplementedEvent(t)
fallthrough
default:
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
case linux.PR_SET_NO_NEW_PRIVS:
if args[1].Int() != 1 || args[2].Int() != 0 || args[3].Int() != 0 || args[4].Int() != 0 {
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
// no_new_privs is assumed to always be set. See
// kernel.Task.updateCredsForExec.
@@ -167,14 +166,14 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
case linux.PR_GET_NO_NEW_PRIVS:
if args[1].Int() != 0 || args[2].Int() != 0 || args[3].Int() != 0 || args[4].Int() != 0 {
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
return 1, nil, nil
case linux.PR_SET_SECCOMP:
if args[1].Int() != linux.SECCOMP_MODE_FILTER {
// Unsupported mode.
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
return 0, nil, seccomp(t, linux.SECCOMP_SET_MODE_FILTER, 0, args[2].Pointer())
@@ -185,7 +184,7 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
case linux.PR_CAPBSET_READ:
cp := linux.Capability(args[1].Uint64())
if !cp.Ok() {
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
var rv uintptr
if auth.CapabilitySetOf(cp)&t.Credentials().BoundingCaps != 0 {
@@ -196,7 +195,7 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
case linux.PR_CAPBSET_DROP:
cp := linux.Capability(args[1].Uint64())
if !cp.Ok() {
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
return 0, nil, t.DropBoundingCapability(cp)
@@ -221,7 +220,7 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
t.Kernel().EmitUnimplementedEvent(t)
fallthrough
default:
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
return 0, nil, nil
diff --git a/pkg/sentry/syscalls/linux/sys_random.go b/pkg/sentry/syscalls/linux/sys_random.go
index fc3959a7e..bc4c588bf 100644
--- a/pkg/sentry/syscalls/linux/sys_random.go
+++ b/pkg/sentry/syscalls/linux/sys_random.go
@@ -18,12 +18,12 @@ import (
"io"
"math"
- "gvisor.googlesource.com/gvisor/pkg/rand"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/rand"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/safemem"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
const (
diff --git a/pkg/sentry/syscalls/linux/sys_read.go b/pkg/sentry/syscalls/linux/sys_read.go
index 48b0fd49d..b2474e60d 100644
--- a/pkg/sentry/syscalls/linux/sys_read.go
+++ b/pkg/sentry/syscalls/linux/sys_read.go
@@ -17,16 +17,15 @@ package linux
import (
"time"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/socket"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
const (
@@ -39,11 +38,11 @@ const (
// they can do large reads all at once. Bug for bug. Same for other read
// calls below.
func Read(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
addr := args[1].Pointer()
size := args[2].SizeT()
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
@@ -75,12 +74,12 @@ func Read(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC
// Pread64 implements linux syscall pread64(2).
func Pread64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
addr := args[1].Pointer()
size := args[2].SizeT()
offset := args[3].Int64()
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
@@ -122,11 +121,11 @@ func Pread64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
// Readv implements linux syscall readv(2).
func Readv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
addr := args[1].Pointer()
iovcnt := int(args[2].Int())
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
@@ -152,12 +151,12 @@ func Readv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
// Preadv implements linux syscall preadv(2).
func Preadv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
addr := args[1].Pointer()
iovcnt := int(args[2].Int())
offset := args[3].Int64()
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
@@ -201,13 +200,13 @@ func Preadv2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
// splits the offset argument into a high/low value for compatibility with
// 32-bit architectures. The flags argument is the 5th argument.
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
addr := args[1].Pointer()
iovcnt := int(args[2].Int())
offset := args[3].Int64()
flags := int(args[5].Int())
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
diff --git a/pkg/sentry/syscalls/linux/sys_rlimit.go b/pkg/sentry/syscalls/linux/sys_rlimit.go
index 8b0379779..51e3f836b 100644
--- a/pkg/sentry/syscalls/linux/sys_rlimit.go
+++ b/pkg/sentry/syscalls/linux/sys_rlimit.go
@@ -15,12 +15,12 @@
package linux
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/limits"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// rlimit describes an implementation of 'struct rlimit', which may vary from
diff --git a/pkg/sentry/syscalls/linux/sys_rusage.go b/pkg/sentry/syscalls/linux/sys_rusage.go
index 003d718da..1674c7445 100644
--- a/pkg/sentry/syscalls/linux/sys_rusage.go
+++ b/pkg/sentry/syscalls/linux/sys_rusage.go
@@ -15,12 +15,12 @@
package linux
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/syserror"
)
func getrusage(t *kernel.Task, which int32) linux.Rusage {
diff --git a/pkg/sentry/syscalls/linux/sys_sched.go b/pkg/sentry/syscalls/linux/sys_sched.go
index 8aea03abe..99f6993f5 100644
--- a/pkg/sentry/syscalls/linux/sys_sched.go
+++ b/pkg/sentry/syscalls/linux/sys_sched.go
@@ -15,11 +15,10 @@
package linux
import (
- "syscall"
-
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/syserror"
)
const (
@@ -37,13 +36,13 @@ func SchedGetparam(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel
pid := args[0].Int()
param := args[1].Pointer()
if param == 0 {
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
if pid < 0 {
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
if pid != 0 && t.PIDNamespace().TaskWithID(kernel.ThreadID(pid)) == nil {
- return 0, nil, syscall.ESRCH
+ return 0, nil, syserror.ESRCH
}
r := SchedParam{schedPriority: onlyPriority}
if _, err := t.CopyOut(param, r); err != nil {
@@ -57,10 +56,10 @@ func SchedGetparam(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel
func SchedGetscheduler(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
pid := args[0].Int()
if pid < 0 {
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
if pid != 0 && t.PIDNamespace().TaskWithID(kernel.ThreadID(pid)) == nil {
- return 0, nil, syscall.ESRCH
+ return 0, nil, syserror.ESRCH
}
return onlyScheduler, nil, nil
}
@@ -71,20 +70,20 @@ func SchedSetscheduler(t *kernel.Task, args arch.SyscallArguments) (uintptr, *ke
policy := args[1].Int()
param := args[2].Pointer()
if pid < 0 {
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
if policy != onlyScheduler {
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
if pid != 0 && t.PIDNamespace().TaskWithID(kernel.ThreadID(pid)) == nil {
- return 0, nil, syscall.ESRCH
+ return 0, nil, syserror.ESRCH
}
var r SchedParam
if _, err := t.CopyIn(param, &r); err != nil {
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
if r.schedPriority != onlyPriority {
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
return 0, nil, nil
}
diff --git a/pkg/sentry/syscalls/linux/sys_seccomp.go b/pkg/sentry/syscalls/linux/sys_seccomp.go
index b4262162a..18510ead8 100644
--- a/pkg/sentry/syscalls/linux/sys_seccomp.go
+++ b/pkg/sentry/syscalls/linux/sys_seccomp.go
@@ -15,13 +15,12 @@
package linux
import (
- "syscall"
-
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/bpf"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/bpf"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// userSockFprog is equivalent to Linux's struct sock_fprog on amd64.
@@ -43,7 +42,7 @@ func seccomp(t *kernel.Task, mode, flags uint64, addr usermem.Addr) error {
// We only support SECCOMP_SET_MODE_FILTER at the moment.
if mode != linux.SECCOMP_SET_MODE_FILTER {
// Unsupported mode.
- return syscall.EINVAL
+ return syserror.EINVAL
}
tsync := flags&linux.SECCOMP_FILTER_FLAG_TSYNC != 0
@@ -51,7 +50,7 @@ func seccomp(t *kernel.Task, mode, flags uint64, addr usermem.Addr) error {
// The only flag we support now is SECCOMP_FILTER_FLAG_TSYNC.
if flags&^linux.SECCOMP_FILTER_FLAG_TSYNC != 0 {
// Unsupported flag.
- return syscall.EINVAL
+ return syserror.EINVAL
}
var fprog userSockFprog
@@ -65,7 +64,7 @@ func seccomp(t *kernel.Task, mode, flags uint64, addr usermem.Addr) error {
compiledFilter, err := bpf.Compile(filter)
if err != nil {
t.Debugf("Invalid seccomp-bpf filter: %v", err)
- return syscall.EINVAL
+ return syserror.EINVAL
}
return t.AppendSyscallFilter(compiledFilter, tsync)
diff --git a/pkg/sentry/syscalls/linux/sys_sem.go b/pkg/sentry/syscalls/linux/sys_sem.go
index 5bd61ab87..cde3b54e7 100644
--- a/pkg/sentry/syscalls/linux/sys_sem.go
+++ b/pkg/sentry/syscalls/linux/sys_sem.go
@@ -17,13 +17,13 @@ package linux
import (
"math"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
const opsMax = 500 // SEMOPM
diff --git a/pkg/sentry/syscalls/linux/sys_shm.go b/pkg/sentry/syscalls/linux/sys_shm.go
index d0eceac7c..d57ffb3a1 100644
--- a/pkg/sentry/syscalls/linux/sys_shm.go
+++ b/pkg/sentry/syscalls/linux/sys_shm.go
@@ -15,11 +15,11 @@
package linux
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/shm"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/shm"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// Shmget implements shmget(2).
diff --git a/pkg/sentry/syscalls/linux/sys_signal.go b/pkg/sentry/syscalls/linux/sys_signal.go
index 7fbeb4fcd..0104a94c0 100644
--- a/pkg/sentry/syscalls/linux/sys_signal.go
+++ b/pkg/sentry/syscalls/linux/sys_signal.go
@@ -18,10 +18,10 @@ import (
"math"
"time"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// "For a process to have permission to send a signal it must
diff --git a/pkg/sentry/syscalls/linux/sys_socket.go b/pkg/sentry/syscalls/linux/sys_socket.go
index 31295a6a9..195734257 100644
--- a/pkg/sentry/syscalls/linux/sys_socket.go
+++ b/pkg/sentry/syscalls/linux/sys_socket.go
@@ -15,21 +15,20 @@
package linux
import (
- "syscall"
"time"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/binary"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/control"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/socket"
+ "gvisor.dev/gvisor/pkg/sentry/socket/control"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserr"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// minListenBacklog is the minimum reasonable backlog for listening sockets.
@@ -61,6 +60,8 @@ const controlLenOffset = 40
// to the Flags field.
const flagsOffset = 48
+const sizeOfInt32 = 4
+
// messageHeader64Len is the length of a MessageHeader64 struct.
var messageHeader64Len = uint64(binary.Size(MessageHeader64{}))
@@ -129,7 +130,7 @@ func CopyInMessageHeader64(t *kernel.Task, addr usermem.Addr, msg *MessageHeader
// from the untrusted address space range.
func CaptureAddress(t *kernel.Task, addr usermem.Addr, addrlen uint32) ([]byte, error) {
if addrlen > maxAddrLen {
- return nil, syscall.EINVAL
+ return nil, syserror.EINVAL
}
addrBuf := make([]byte, addrlen)
@@ -151,7 +152,7 @@ func writeAddress(t *kernel.Task, addr interface{}, addrLen uint32, addrPtr user
}
if int32(bufLen) < 0 {
- return syscall.EINVAL
+ return syserror.EINVAL
}
// Write the length unconditionally.
@@ -184,7 +185,7 @@ func Socket(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
// Check and initialize the flags.
if stype & ^(0xf|linux.SOCK_NONBLOCK|linux.SOCK_CLOEXEC) != 0 {
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
// Create the new socket.
@@ -197,9 +198,9 @@ func Socket(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
})
defer s.DecRef()
- fd, err := t.FDMap().NewFDFrom(0, s, kernel.FDFlags{
+ fd, err := t.NewFDFrom(0, s, kernel.FDFlags{
CloseOnExec: stype&linux.SOCK_CLOEXEC != 0,
- }, t.ThreadGroup().Limits())
+ })
if err != nil {
return 0, nil, err
}
@@ -216,15 +217,12 @@ func SocketPair(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy
// Check and initialize the flags.
if stype & ^(0xf|linux.SOCK_NONBLOCK|linux.SOCK_CLOEXEC) != 0 {
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
fileFlags := fs.SettableFileFlags{
NonBlocking: stype&linux.SOCK_NONBLOCK != 0,
}
- fdFlags := kernel.FDFlags{
- CloseOnExec: stype&linux.SOCK_CLOEXEC != 0,
- }
// Create the socket pair.
s1, s2, e := socket.Pair(t, domain, linux.SockType(stype&0xf), protocol)
@@ -237,20 +235,16 @@ func SocketPair(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy
defer s2.DecRef()
// Create the FDs for the sockets.
- fd1, err := t.FDMap().NewFDFrom(0, s1, fdFlags, t.ThreadGroup().Limits())
- if err != nil {
- return 0, nil, err
- }
- fd2, err := t.FDMap().NewFDFrom(0, s2, fdFlags, t.ThreadGroup().Limits())
+ fds, err := t.NewFDs(0, []*fs.File{s1, s2}, kernel.FDFlags{
+ CloseOnExec: stype&linux.SOCK_CLOEXEC != 0,
+ })
if err != nil {
- t.FDMap().Remove(fd1)
return 0, nil, err
}
// Copy the file descriptors out.
- if _, err := t.CopyOut(socks, []int32{int32(fd1), int32(fd2)}); err != nil {
- t.FDMap().Remove(fd1)
- t.FDMap().Remove(fd2)
+ if _, err := t.CopyOut(socks, fds); err != nil {
+ // Note that we don't close files here; see pipe(2) also.
return 0, nil, err
}
@@ -259,21 +253,21 @@ func SocketPair(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy
// Connect implements the linux syscall connect(2).
func Connect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
addr := args[1].Pointer()
addrlen := args[2].Uint()
// Get socket from the file descriptor.
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
- return 0, nil, syscall.EBADF
+ return 0, nil, syserror.EBADF
}
defer file.DecRef()
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
- return 0, nil, syscall.ENOTSOCK
+ return 0, nil, syserror.ENOTSOCK
}
// Capture address and call syscall implementation.
@@ -288,23 +282,23 @@ func Connect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
// accept is the implementation of the accept syscall. It is called by accept
// and accept4 syscall handlers.
-func accept(t *kernel.Task, fd kdefs.FD, addr usermem.Addr, addrLen usermem.Addr, flags int) (uintptr, error) {
+func accept(t *kernel.Task, fd int32, addr usermem.Addr, addrLen usermem.Addr, flags int) (uintptr, error) {
// Check that no unsupported flags are passed in.
if flags & ^(linux.SOCK_NONBLOCK|linux.SOCK_CLOEXEC) != 0 {
- return 0, syscall.EINVAL
+ return 0, syserror.EINVAL
}
// Get socket from the file descriptor.
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
- return 0, syscall.EBADF
+ return 0, syserror.EBADF
}
defer file.DecRef()
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
- return 0, syscall.ENOTSOCK
+ return 0, syserror.ENOTSOCK
}
// Call the syscall implementation for this socket, then copy the
@@ -319,7 +313,7 @@ func accept(t *kernel.Task, fd kdefs.FD, addr usermem.Addr, addrLen usermem.Addr
if peerRequested {
// NOTE(magi): Linux does not give you an error if it can't
// write the data back out so neither do we.
- if err := writeAddress(t, peer, peerLen, addr, addrLen); err == syscall.EINVAL {
+ if err := writeAddress(t, peer, peerLen, addr, addrLen); err == syserror.EINVAL {
return 0, err
}
}
@@ -328,7 +322,7 @@ func accept(t *kernel.Task, fd kdefs.FD, addr usermem.Addr, addrLen usermem.Addr
// Accept4 implements the linux syscall accept4(2).
func Accept4(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
addr := args[1].Pointer()
addrlen := args[2].Pointer()
flags := int(args[3].Int())
@@ -339,7 +333,7 @@ func Accept4(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
// Accept implements the linux syscall accept(2).
func Accept(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
addr := args[1].Pointer()
addrlen := args[2].Pointer()
@@ -349,21 +343,21 @@ func Accept(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
// Bind implements the linux syscall bind(2).
func Bind(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
addr := args[1].Pointer()
addrlen := args[2].Uint()
// Get socket from the file descriptor.
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
- return 0, nil, syscall.EBADF
+ return 0, nil, syserror.EBADF
}
defer file.DecRef()
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
- return 0, nil, syscall.ENOTSOCK
+ return 0, nil, syserror.ENOTSOCK
}
// Capture address and call syscall implementation.
@@ -377,20 +371,20 @@ func Bind(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC
// Listen implements the linux syscall listen(2).
func Listen(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
backlog := args[1].Int()
// Get socket from the file descriptor.
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
- return 0, nil, syscall.EBADF
+ return 0, nil, syserror.EBADF
}
defer file.DecRef()
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
- return 0, nil, syscall.ENOTSOCK
+ return 0, nil, syserror.ENOTSOCK
}
// Per Linux, the backlog is silently capped to reasonable values.
@@ -406,27 +400,27 @@ func Listen(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
// Shutdown implements the linux syscall shutdown(2).
func Shutdown(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
how := args[1].Int()
// Get socket from the file descriptor.
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
- return 0, nil, syscall.EBADF
+ return 0, nil, syserror.EBADF
}
defer file.DecRef()
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
- return 0, nil, syscall.ENOTSOCK
+ return 0, nil, syserror.ENOTSOCK
}
// Validate how, then call syscall implementation.
switch how {
case linux.SHUT_RD, linux.SHUT_WR, linux.SHUT_RDWR:
default:
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
return 0, nil, s.Shutdown(t, int(how)).ToError()
@@ -434,23 +428,23 @@ func Shutdown(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
// GetSockOpt implements the linux syscall getsockopt(2).
func GetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
level := args[1].Int()
name := args[2].Int()
optValAddr := args[3].Pointer()
optLenAddr := args[4].Pointer()
// Get socket from the file descriptor.
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
- return 0, nil, syscall.EBADF
+ return 0, nil, syserror.EBADF
}
defer file.DecRef()
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
- return 0, nil, syscall.ENOTSOCK
+ return 0, nil, syserror.ENOTSOCK
}
// Read the length if present. Reject negative values.
@@ -461,12 +455,12 @@ func GetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy
}
if optLen < 0 {
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
}
// Call syscall implementation then copy both value and value len out.
- v, e := s.GetSockOpt(t, int(level), int(name), int(optLen))
+ v, e := getSockOpt(t, s, int(level), int(name), int(optLen))
if e != nil {
return 0, nil, e.ToError()
}
@@ -487,34 +481,61 @@ func GetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy
return 0, nil, nil
}
+// getSockOpt tries to handle common socket options, or dispatches to a specific
+// socket implementation.
+func getSockOpt(t *kernel.Task, s socket.Socket, level, name, len int) (interface{}, *syserr.Error) {
+ if level == linux.SOL_SOCKET {
+ switch name {
+ case linux.SO_TYPE, linux.SO_DOMAIN, linux.SO_PROTOCOL:
+ if len < sizeOfInt32 {
+ return nil, syserr.ErrInvalidArgument
+ }
+ }
+
+ switch name {
+ case linux.SO_TYPE:
+ _, skType, _ := s.Type()
+ return int32(skType), nil
+ case linux.SO_DOMAIN:
+ family, _, _ := s.Type()
+ return int32(family), nil
+ case linux.SO_PROTOCOL:
+ _, _, protocol := s.Type()
+ return int32(protocol), nil
+ }
+ }
+
+ return s.GetSockOpt(t, level, name, len)
+}
+
// SetSockOpt implements the linux syscall setsockopt(2).
//
// Note that unlike Linux, enabling SO_PASSCRED does not autobind the socket.
func SetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
level := args[1].Int()
name := args[2].Int()
optValAddr := args[3].Pointer()
optLen := args[4].Int()
// Get socket from the file descriptor.
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
- return 0, nil, syscall.EBADF
+ return 0, nil, syserror.EBADF
}
defer file.DecRef()
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
- return 0, nil, syscall.ENOTSOCK
+ return 0, nil, syserror.ENOTSOCK
}
if optLen <= 0 {
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
if optLen > maxOptLen {
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
buf := t.CopyScratchBuffer(int(optLen))
if _, err := t.CopyIn(optValAddr, &buf); err != nil {
@@ -531,21 +552,21 @@ func SetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy
// GetSockName implements the linux syscall getsockname(2).
func GetSockName(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
addr := args[1].Pointer()
addrlen := args[2].Pointer()
// Get socket from the file descriptor.
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
- return 0, nil, syscall.EBADF
+ return 0, nil, syserror.EBADF
}
defer file.DecRef()
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
- return 0, nil, syscall.ENOTSOCK
+ return 0, nil, syserror.ENOTSOCK
}
// Get the socket name and copy it to the caller.
@@ -559,21 +580,21 @@ func GetSockName(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S
// GetPeerName implements the linux syscall getpeername(2).
func GetPeerName(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
addr := args[1].Pointer()
addrlen := args[2].Pointer()
// Get socket from the file descriptor.
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
- return 0, nil, syscall.EBADF
+ return 0, nil, syserror.EBADF
}
defer file.DecRef()
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
- return 0, nil, syscall.ENOTSOCK
+ return 0, nil, syserror.ENOTSOCK
}
// Get the socket peer name and copy it to the caller.
@@ -587,31 +608,31 @@ func GetPeerName(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S
// RecvMsg implements the linux syscall recvmsg(2).
func RecvMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
msgPtr := args[1].Pointer()
flags := args[2].Int()
if t.Arch().Width() != 8 {
// We only handle 64-bit for now.
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
// Get socket from the file descriptor.
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
- return 0, nil, syscall.EBADF
+ return 0, nil, syserror.EBADF
}
defer file.DecRef()
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
- return 0, nil, syscall.ENOTSOCK
+ return 0, nil, syserror.ENOTSOCK
}
// Reject flags that we don't handle yet.
if flags & ^(baseRecvFlags|linux.MSG_PEEK|linux.MSG_CMSG_CLOEXEC|linux.MSG_ERRQUEUE) != 0 {
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
if file.Flags().NonBlocking {
@@ -633,7 +654,7 @@ func RecvMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
// RecvMMsg implements the linux syscall recvmmsg(2).
func RecvMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
msgPtr := args[1].Pointer()
vlen := args[2].Uint()
flags := args[3].Int()
@@ -641,25 +662,25 @@ func RecvMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
if t.Arch().Width() != 8 {
// We only handle 64-bit for now.
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
// Reject flags that we don't handle yet.
if flags & ^(baseRecvFlags|linux.MSG_CMSG_CLOEXEC|linux.MSG_ERRQUEUE) != 0 {
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
// Get socket from the file descriptor.
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
- return 0, nil, syscall.EBADF
+ return 0, nil, syserror.EBADF
}
defer file.DecRef()
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
- return 0, nil, syscall.ENOTSOCK
+ return 0, nil, syserror.ENOTSOCK
}
if file.Flags().NonBlocking {
@@ -674,7 +695,7 @@ func RecvMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
return 0, nil, err
}
if !ts.Valid() {
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
deadline = t.Kernel().MonotonicClock().Now().Add(ts.ToDuration())
haveDeadline = true
@@ -694,7 +715,7 @@ func RecvMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
for i := uint64(0); i < uint64(vlen); i++ {
mp, ok := msgPtr.AddLength(i * multipleMessageHeader64Len)
if !ok {
- return 0, nil, syscall.EFAULT
+ return 0, nil, syserror.EFAULT
}
var n uintptr
if n, err = recvSingleMsg(t, s, mp, flags, haveDeadline, deadline); err != nil {
@@ -704,7 +725,7 @@ func RecvMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
// Copy the received length to the caller.
lp, ok := mp.AddLength(messageHeader64Len)
if !ok {
- return 0, nil, syscall.EFAULT
+ return 0, nil, syserror.EFAULT
}
if _, err = t.CopyOut(lp, uint32(n)); err != nil {
break
@@ -726,7 +747,7 @@ func recvSingleMsg(t *kernel.Task, s socket.Socket, msgPtr usermem.Addr, flags i
}
if msg.IovLen > linux.UIO_MAXIOV {
- return 0, syscall.EMSGSIZE
+ return 0, syserror.EMSGSIZE
}
dst, err := t.IovecsIOSequence(usermem.Addr(msg.Iov), int(msg.IovLen), usermem.IOOpts{
AddressSpaceActive: true,
@@ -737,7 +758,7 @@ func recvSingleMsg(t *kernel.Task, s socket.Socket, msgPtr usermem.Addr, flags i
// FIXME(b/63594852): Pretend we have an empty error queue.
if flags&linux.MSG_ERRQUEUE != 0 {
- return 0, syscall.EAGAIN
+ return 0, syserror.EAGAIN
}
// Fast path when no control message nor name buffers are provided.
@@ -762,7 +783,7 @@ func recvSingleMsg(t *kernel.Task, s socket.Socket, msgPtr usermem.Addr, flags i
}
if msg.ControlLen > maxControlLen {
- return 0, syscall.ENOBUFS
+ return 0, syserror.ENOBUFS
}
n, mflags, sender, senderLen, cms, e := s.RecvMsg(t, dst, int(flags), haveDeadline, deadline, msg.NameLen != 0, msg.ControlLen)
if e != nil {
@@ -812,27 +833,27 @@ func recvSingleMsg(t *kernel.Task, s socket.Socket, msgPtr usermem.Addr, flags i
// recvFrom is the implementation of the recvfrom syscall. It is called by
// recvfrom and recv syscall handlers.
-func recvFrom(t *kernel.Task, fd kdefs.FD, bufPtr usermem.Addr, bufLen uint64, flags int32, namePtr usermem.Addr, nameLenPtr usermem.Addr) (uintptr, error) {
+func recvFrom(t *kernel.Task, fd int32, bufPtr usermem.Addr, bufLen uint64, flags int32, namePtr usermem.Addr, nameLenPtr usermem.Addr) (uintptr, error) {
if int(bufLen) < 0 {
- return 0, syscall.EINVAL
+ return 0, syserror.EINVAL
}
// Reject flags that we don't handle yet.
if flags & ^(baseRecvFlags|linux.MSG_PEEK|linux.MSG_CONFIRM) != 0 {
- return 0, syscall.EINVAL
+ return 0, syserror.EINVAL
}
// Get socket from the file descriptor.
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
- return 0, syscall.EBADF
+ return 0, syserror.EBADF
}
defer file.DecRef()
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
- return 0, syscall.ENOTSOCK
+ return 0, syserror.ENOTSOCK
}
if file.Flags().NonBlocking {
@@ -873,7 +894,7 @@ func recvFrom(t *kernel.Task, fd kdefs.FD, bufPtr usermem.Addr, bufLen uint64, f
// RecvFrom implements the linux syscall recvfrom(2).
func RecvFrom(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
bufPtr := args[1].Pointer()
bufLen := args[2].Uint64()
flags := args[3].Int()
@@ -886,31 +907,31 @@ func RecvFrom(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
// SendMsg implements the linux syscall sendmsg(2).
func SendMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
msgPtr := args[1].Pointer()
flags := args[2].Int()
if t.Arch().Width() != 8 {
// We only handle 64-bit for now.
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
// Get socket from the file descriptor.
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
- return 0, nil, syscall.EBADF
+ return 0, nil, syserror.EBADF
}
defer file.DecRef()
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
- return 0, nil, syscall.ENOTSOCK
+ return 0, nil, syserror.ENOTSOCK
}
// Reject flags that we don't handle yet.
if flags & ^(linux.MSG_DONTWAIT|linux.MSG_EOR|linux.MSG_MORE|linux.MSG_NOSIGNAL) != 0 {
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
if file.Flags().NonBlocking {
@@ -923,32 +944,32 @@ func SendMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
// SendMMsg implements the linux syscall sendmmsg(2).
func SendMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
msgPtr := args[1].Pointer()
vlen := args[2].Uint()
flags := args[3].Int()
if t.Arch().Width() != 8 {
// We only handle 64-bit for now.
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
// Get socket from the file descriptor.
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
- return 0, nil, syscall.EBADF
+ return 0, nil, syserror.EBADF
}
defer file.DecRef()
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
- return 0, nil, syscall.ENOTSOCK
+ return 0, nil, syserror.ENOTSOCK
}
// Reject flags that we don't handle yet.
if flags & ^(linux.MSG_DONTWAIT|linux.MSG_EOR|linux.MSG_MORE|linux.MSG_NOSIGNAL) != 0 {
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
if file.Flags().NonBlocking {
@@ -960,7 +981,7 @@ func SendMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
for i := uint64(0); i < uint64(vlen); i++ {
mp, ok := msgPtr.AddLength(i * multipleMessageHeader64Len)
if !ok {
- return 0, nil, syscall.EFAULT
+ return 0, nil, syserror.EFAULT
}
var n uintptr
if n, err = sendSingleMsg(t, s, file, mp, flags); err != nil {
@@ -970,7 +991,7 @@ func SendMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
// Copy the received length to the caller.
lp, ok := mp.AddLength(messageHeader64Len)
if !ok {
- return 0, nil, syscall.EFAULT
+ return 0, nil, syserror.EFAULT
}
if _, err = t.CopyOut(lp, uint32(n)); err != nil {
break
@@ -995,7 +1016,7 @@ func sendSingleMsg(t *kernel.Task, s socket.Socket, file *fs.File, msgPtr userme
if msg.ControlLen > 0 {
// Put an upper bound to prevent large allocations.
if msg.ControlLen > maxControlLen {
- return 0, syscall.ENOBUFS
+ return 0, syserror.ENOBUFS
}
controlData = make([]byte, msg.ControlLen)
if _, err := t.CopyIn(usermem.Addr(msg.Control), &controlData); err != nil {
@@ -1015,7 +1036,7 @@ func sendSingleMsg(t *kernel.Task, s socket.Socket, file *fs.File, msgPtr userme
// Read data then call the sendmsg implementation.
if msg.IovLen > linux.UIO_MAXIOV {
- return 0, syscall.EMSGSIZE
+ return 0, syserror.EMSGSIZE
}
src, err := t.IovecsIOSequence(usermem.Addr(msg.Iov), int(msg.IovLen), usermem.IOOpts{
AddressSpaceActive: true,
@@ -1049,23 +1070,23 @@ func sendSingleMsg(t *kernel.Task, s socket.Socket, file *fs.File, msgPtr userme
// sendTo is the implementation of the sendto syscall. It is called by sendto
// and send syscall handlers.
-func sendTo(t *kernel.Task, fd kdefs.FD, bufPtr usermem.Addr, bufLen uint64, flags int32, namePtr usermem.Addr, nameLen uint32) (uintptr, error) {
+func sendTo(t *kernel.Task, fd int32, bufPtr usermem.Addr, bufLen uint64, flags int32, namePtr usermem.Addr, nameLen uint32) (uintptr, error) {
bl := int(bufLen)
if bl < 0 {
- return 0, syscall.EINVAL
+ return 0, syserror.EINVAL
}
// Get socket from the file descriptor.
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
- return 0, syscall.EBADF
+ return 0, syserror.EBADF
}
defer file.DecRef()
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
- return 0, syscall.ENOTSOCK
+ return 0, syserror.ENOTSOCK
}
if file.Flags().NonBlocking {
@@ -1105,7 +1126,7 @@ func sendTo(t *kernel.Task, fd kdefs.FD, bufPtr usermem.Addr, bufLen uint64, fla
// SendTo implements the linux syscall sendto(2).
func SendTo(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
bufPtr := args[1].Pointer()
bufLen := args[2].Uint64()
flags := args[3].Int()
diff --git a/pkg/sentry/syscalls/linux/sys_splice.go b/pkg/sentry/syscalls/linux/sys_splice.go
index 37303606f..a7c98efcb 100644
--- a/pkg/sentry/syscalls/linux/sys_splice.go
+++ b/pkg/sentry/syscalls/linux/sys_splice.go
@@ -15,13 +15,12 @@
package linux
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// doSplice implements a blocking splice operation.
@@ -48,12 +47,12 @@ func doSplice(t *kernel.Task, outFile, inFile *fs.File, opts fs.SpliceOpts, nonB
if ch == nil {
ch = make(chan struct{}, 1)
}
- if !inW && inFile.Readiness(EventMaskRead) == 0 && !inFile.Flags().NonBlocking {
+ if !inW && !inFile.Flags().NonBlocking {
w, _ := waiter.NewChannelEntry(ch)
inFile.EventRegister(&w, EventMaskRead)
defer inFile.EventUnregister(&w)
inW = true // Registered.
- } else if !outW && outFile.Readiness(EventMaskWrite) == 0 && !outFile.Flags().NonBlocking {
+ } else if !outW && !outFile.Flags().NonBlocking {
w, _ := waiter.NewChannelEntry(ch)
outFile.EventRegister(&w, EventMaskWrite)
defer outFile.EventUnregister(&w)
@@ -65,6 +64,11 @@ func doSplice(t *kernel.Task, outFile, inFile *fs.File, opts fs.SpliceOpts, nonB
break
}
+ if (!inW || inFile.Readiness(EventMaskRead) != 0) && (!outW || outFile.Readiness(EventMaskWrite) != 0) {
+ // Something became ready, try again without blocking.
+ continue
+ }
+
// Block until there's data.
if err = t.Block(ch); err != nil {
break
@@ -76,8 +80,8 @@ func doSplice(t *kernel.Task, outFile, inFile *fs.File, opts fs.SpliceOpts, nonB
// Sendfile implements linux system call sendfile(2).
func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- outFD := kdefs.FD(args[0].Int())
- inFD := kdefs.FD(args[1].Int())
+ outFD := args[0].Int()
+ inFD := args[1].Int()
offsetAddr := args[2].Pointer()
count := int64(args[3].SizeT())
@@ -87,13 +91,13 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
}
// Get files.
- outFile := t.FDMap().GetFile(outFD)
+ outFile := t.GetFile(outFD)
if outFile == nil {
return 0, nil, syserror.EBADF
}
defer outFile.DecRef()
- inFile := t.FDMap().GetFile(inFD)
+ inFile := t.GetFile(inFD)
if inFile == nil {
return 0, nil, syserror.EBADF
}
@@ -158,9 +162,9 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
// Splice implements splice(2).
func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- inFD := kdefs.FD(args[0].Int())
+ inFD := args[0].Int()
inOffset := args[1].Pointer()
- outFD := kdefs.FD(args[2].Int())
+ outFD := args[2].Int()
outOffset := args[3].Pointer()
count := int64(args[4].SizeT())
flags := args[5].Int()
@@ -177,13 +181,13 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
nonBlocking := (flags & linux.SPLICE_F_NONBLOCK) != 0
// Get files.
- outFile := t.FDMap().GetFile(outFD)
+ outFile := t.GetFile(outFD)
if outFile == nil {
return 0, nil, syserror.EBADF
}
defer outFile.DecRef()
- inFile := t.FDMap().GetFile(inFD)
+ inFile := t.GetFile(inFD)
if inFile == nil {
return 0, nil, syserror.EBADF
}
@@ -246,8 +250,8 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
// Tee imlements tee(2).
func Tee(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- inFD := kdefs.FD(args[0].Int())
- outFD := kdefs.FD(args[1].Int())
+ inFD := args[0].Int()
+ outFD := args[1].Int()
count := int64(args[2].SizeT())
flags := args[3].Int()
@@ -260,13 +264,13 @@ func Tee(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallCo
nonBlocking := (flags & linux.SPLICE_F_NONBLOCK) != 0
// Get files.
- outFile := t.FDMap().GetFile(outFD)
+ outFile := t.GetFile(outFD)
if outFile == nil {
return 0, nil, syserror.EBADF
}
defer outFile.DecRef()
- inFile := t.FDMap().GetFile(inFD)
+ inFile := t.GetFile(inFD)
if inFile == nil {
return 0, nil, syserror.EBADF
}
diff --git a/pkg/sentry/syscalls/linux/sys_stat.go b/pkg/sentry/syscalls/linux/sys_stat.go
index 10fc201ef..5556bc276 100644
--- a/pkg/sentry/syscalls/linux/sys_stat.go
+++ b/pkg/sentry/syscalls/linux/sys_stat.go
@@ -15,14 +15,13 @@
package linux
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/binary"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// Stat implements linux syscall stat(2).
@@ -35,14 +34,14 @@ func Stat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC
return 0, nil, err
}
- return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error {
+ return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error {
return stat(t, d, dirPath, statAddr)
})
}
// Fstatat implements linux syscall newfstatat, i.e. fstatat(2).
func Fstatat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
addr := args[1].Pointer()
statAddr := args[2].Pointer()
flags := args[3].Int()
@@ -54,7 +53,7 @@ func Fstatat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
if path == "" {
// Annoying. What's wrong with fstat?
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
@@ -67,7 +66,7 @@ func Fstatat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
// then we must resolve the final component.
resolve := dirPath || flags&linux.AT_SYMLINK_NOFOLLOW == 0
- return 0, nil, fileOpOn(t, fd, path, resolve, func(root *fs.Dirent, d *fs.Dirent) error {
+ return 0, nil, fileOpOn(t, fd, path, resolve, func(root *fs.Dirent, d *fs.Dirent, _ uint) error {
return stat(t, d, dirPath, statAddr)
})
}
@@ -86,17 +85,17 @@ func Lstat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
// want to resolve the final component.
resolve := dirPath
- return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, resolve, func(root *fs.Dirent, d *fs.Dirent) error {
+ return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, resolve, func(root *fs.Dirent, d *fs.Dirent, _ uint) error {
return stat(t, d, dirPath, statAddr)
})
}
// Fstat implements linux syscall fstat(2).
func Fstat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
statAddr := args[1].Pointer()
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
@@ -132,24 +131,6 @@ func fstat(t *kernel.Task, f *fs.File, statAddr usermem.Addr) error {
// t.CopyObjectOut has noticeable performance impact due to its many slice
// allocations and use of reflection.
func copyOutStat(t *kernel.Task, dst usermem.Addr, sattr fs.StableAttr, uattr fs.UnstableAttr) error {
- var mode uint32
- switch sattr.Type {
- case fs.RegularFile, fs.SpecialFile:
- mode |= linux.ModeRegular
- case fs.Symlink:
- mode |= linux.ModeSymlink
- case fs.Directory, fs.SpecialDirectory:
- mode |= linux.ModeDirectory
- case fs.Pipe:
- mode |= linux.ModeNamedPipe
- case fs.CharacterDevice:
- mode |= linux.ModeCharacterDevice
- case fs.BlockDevice:
- mode |= linux.ModeBlockDevice
- case fs.Socket:
- mode |= linux.ModeSocket
- }
-
b := t.CopyScratchBuffer(int(linux.SizeOfStat))[:0]
// Dev (uint64)
@@ -159,7 +140,7 @@ func copyOutStat(t *kernel.Task, dst usermem.Addr, sattr fs.StableAttr, uattr fs
// Nlink (uint64)
b = binary.AppendUint64(b, usermem.ByteOrder, uattr.Links)
// Mode (uint32)
- b = binary.AppendUint32(b, usermem.ByteOrder, mode|uint32(uattr.Perms.LinuxMode()))
+ b = binary.AppendUint32(b, usermem.ByteOrder, sattr.Type.LinuxType()|uint32(uattr.Perms.LinuxMode()))
// UID (uint32)
b = binary.AppendUint32(b, usermem.ByteOrder, uint32(uattr.Owner.UID.In(t.UserNamespace()).OrOverflow()))
// GID (uint32)
@@ -194,6 +175,98 @@ func copyOutStat(t *kernel.Task, dst usermem.Addr, sattr fs.StableAttr, uattr fs
return err
}
+// Statx implements linux syscall statx(2).
+func Statx(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ fd := args[0].Int()
+ pathAddr := args[1].Pointer()
+ flags := args[2].Int()
+ mask := args[3].Uint()
+ statxAddr := args[4].Pointer()
+
+ if mask&linux.STATX__RESERVED > 0 {
+ return 0, nil, syserror.EINVAL
+ }
+ if flags&linux.AT_STATX_SYNC_TYPE == linux.AT_STATX_SYNC_TYPE {
+ return 0, nil, syserror.EINVAL
+ }
+
+ path, dirPath, err := copyInPath(t, pathAddr, flags&linux.AT_EMPTY_PATH != 0)
+ if err != nil {
+ return 0, nil, err
+ }
+
+ if path == "" {
+ file := t.GetFile(fd)
+ if file == nil {
+ return 0, nil, syserror.EBADF
+ }
+ defer file.DecRef()
+ uattr, err := file.UnstableAttr(t)
+ if err != nil {
+ return 0, nil, err
+ }
+ return 0, nil, statx(t, file.Dirent.Inode.StableAttr, uattr, statxAddr)
+ }
+
+ resolve := dirPath || flags&linux.AT_SYMLINK_NOFOLLOW == 0
+
+ return 0, nil, fileOpOn(t, fd, path, resolve, func(root *fs.Dirent, d *fs.Dirent, _ uint) error {
+ if dirPath && !fs.IsDir(d.Inode.StableAttr) {
+ return syserror.ENOTDIR
+ }
+ uattr, err := d.Inode.UnstableAttr(t)
+ if err != nil {
+ return err
+ }
+ return statx(t, d.Inode.StableAttr, uattr, statxAddr)
+ })
+}
+
+func statx(t *kernel.Task, sattr fs.StableAttr, uattr fs.UnstableAttr, statxAddr usermem.Addr) error {
+ // "[T]he kernel may return fields that weren't requested and may fail to
+ // return fields that were requested, depending on what the backing
+ // filesystem supports.
+ // [...]
+ // A filesystem may also fill in fields that the caller didn't ask for
+ // if it has values for them available and the information is available
+ // at no extra cost. If this happens, the corresponding bits will be
+ // set in stx_mask." -- statx(2)
+ //
+ // We fill in all the values we have (which currently does not include
+ // btime, see b/135608823), regardless of what the user asked for. The
+ // STATX_BASIC_STATS mask indicates that all fields are present except
+ // for btime.
+
+ devMajor, devMinor := linux.DecodeDeviceID(uint32(sattr.DeviceID))
+ s := linux.Statx{
+ // TODO(b/135608823): Support btime, and then change this to
+ // STATX_ALL to indicate presence of btime.
+ Mask: linux.STATX_BASIC_STATS,
+
+ // No attributes, and none supported.
+ Attributes: 0,
+ AttributesMask: 0,
+
+ Blksize: uint32(sattr.BlockSize),
+ Nlink: uint32(uattr.Links),
+ UID: uint32(uattr.Owner.UID.In(t.UserNamespace()).OrOverflow()),
+ GID: uint32(uattr.Owner.GID.In(t.UserNamespace()).OrOverflow()),
+ Mode: uint16(sattr.Type.LinuxType()) | uint16(uattr.Perms.LinuxMode()),
+ Ino: sattr.InodeID,
+ Size: uint64(uattr.Size),
+ Blocks: uint64(uattr.Usage) / 512,
+ Atime: uattr.AccessTime.StatxTimestamp(),
+ Ctime: uattr.StatusChangeTime.StatxTimestamp(),
+ Mtime: uattr.ModificationTime.StatxTimestamp(),
+ RdevMajor: uint32(sattr.DeviceFileMajor),
+ RdevMinor: sattr.DeviceFileMinor,
+ DevMajor: uint32(devMajor),
+ DevMinor: devMinor,
+ }
+ _, err := t.CopyOut(statxAddr, &s)
+ return err
+}
+
// Statfs implements linux syscall statfs(2).
func Statfs(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
addr := args[0].Pointer()
@@ -204,17 +277,17 @@ func Statfs(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
return 0, nil, err
}
- return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error {
+ return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error {
return statfsImpl(t, d, statfsAddr)
})
}
// Fstatfs implements linux syscall fstatfs(2).
func Fstatfs(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
statfsAddr := args[1].Pointer()
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
@@ -252,8 +325,6 @@ func statfsImpl(t *kernel.Task, d *fs.Dirent, addr usermem.Addr) error {
FragmentSize: d.Inode.StableAttr.BlockSize,
// Leave other fields 0 like simple_statfs does.
}
- if _, err := t.CopyOut(addr, &statfs); err != nil {
- return err
- }
- return nil
+ _, err = t.CopyOut(addr, &statfs)
+ return err
}
diff --git a/pkg/sentry/syscalls/linux/sys_sync.go b/pkg/sentry/syscalls/linux/sys_sync.go
index 4352482fb..3e55235bd 100644
--- a/pkg/sentry/syscalls/linux/sys_sync.go
+++ b/pkg/sentry/syscalls/linux/sys_sync.go
@@ -15,12 +15,11 @@
package linux
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// Sync implements linux system call sync(2).
@@ -32,9 +31,9 @@ func Sync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC
// Syncfs implements linux system call syncfs(2).
func Syncfs(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
@@ -47,9 +46,9 @@ func Syncfs(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
// Fsync implements linux syscall fsync(2).
func Fsync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
@@ -63,9 +62,9 @@ func Fsync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
//
// At the moment, it just calls Fsync, which is a big hammer, but correct.
func Fdatasync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
@@ -79,6 +78,7 @@ func Fdatasync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
func SyncFileRange(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
var err error
+ fd := args[0].Int()
offset := args[1].Int64()
nbytes := args[2].Int64()
uflags := args[3].Uint()
@@ -97,8 +97,7 @@ func SyncFileRange(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel
nbytes = fs.FileMaxOffset
}
- fd := kdefs.FD(args[0].Int())
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
diff --git a/pkg/sentry/syscalls/linux/sys_sysinfo.go b/pkg/sentry/syscalls/linux/sys_sysinfo.go
index ecf88edc1..a65b560c8 100644
--- a/pkg/sentry/syscalls/linux/sys_sysinfo.go
+++ b/pkg/sentry/syscalls/linux/sys_sysinfo.go
@@ -15,10 +15,10 @@
package linux
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
)
// Sysinfo implements the sysinfo syscall as described in man 2 sysinfo.
diff --git a/pkg/sentry/syscalls/linux/sys_syslog.go b/pkg/sentry/syscalls/linux/sys_syslog.go
index 9efc58d34..40c8bb061 100644
--- a/pkg/sentry/syscalls/linux/sys_syslog.go
+++ b/pkg/sentry/syscalls/linux/sys_syslog.go
@@ -15,9 +15,9 @@
package linux
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/syserror"
)
const (
diff --git a/pkg/sentry/syscalls/linux/sys_thread.go b/pkg/sentry/syscalls/linux/sys_thread.go
index 26f7e8ead..595eb9155 100644
--- a/pkg/sentry/syscalls/linux/sys_thread.go
+++ b/pkg/sentry/syscalls/linux/sys_thread.go
@@ -17,12 +17,12 @@ package linux
import (
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/sched"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/sched"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
const (
@@ -123,29 +123,29 @@ func ExitGroup(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
func clone(t *kernel.Task, flags int, stack usermem.Addr, parentTID usermem.Addr, childTID usermem.Addr, tls usermem.Addr) (uintptr, *kernel.SyscallControl, error) {
opts := kernel.CloneOptions{
SharingOptions: kernel.SharingOptions{
- NewAddressSpace: flags&syscall.CLONE_VM == 0,
- NewSignalHandlers: flags&syscall.CLONE_SIGHAND == 0,
- NewThreadGroup: flags&syscall.CLONE_THREAD == 0,
+ NewAddressSpace: flags&linux.CLONE_VM == 0,
+ NewSignalHandlers: flags&linux.CLONE_SIGHAND == 0,
+ NewThreadGroup: flags&linux.CLONE_THREAD == 0,
TerminationSignal: linux.Signal(flags & exitSignalMask),
- NewPIDNamespace: flags&syscall.CLONE_NEWPID == syscall.CLONE_NEWPID,
- NewUserNamespace: flags&syscall.CLONE_NEWUSER == syscall.CLONE_NEWUSER,
- NewNetworkNamespace: flags&syscall.CLONE_NEWNET == syscall.CLONE_NEWNET,
- NewFiles: flags&syscall.CLONE_FILES == 0,
- NewFSContext: flags&syscall.CLONE_FS == 0,
- NewUTSNamespace: flags&syscall.CLONE_NEWUTS == syscall.CLONE_NEWUTS,
- NewIPCNamespace: flags&syscall.CLONE_NEWIPC == syscall.CLONE_NEWIPC,
+ NewPIDNamespace: flags&linux.CLONE_NEWPID == linux.CLONE_NEWPID,
+ NewUserNamespace: flags&linux.CLONE_NEWUSER == linux.CLONE_NEWUSER,
+ NewNetworkNamespace: flags&linux.CLONE_NEWNET == linux.CLONE_NEWNET,
+ NewFiles: flags&linux.CLONE_FILES == 0,
+ NewFSContext: flags&linux.CLONE_FS == 0,
+ NewUTSNamespace: flags&linux.CLONE_NEWUTS == linux.CLONE_NEWUTS,
+ NewIPCNamespace: flags&linux.CLONE_NEWIPC == linux.CLONE_NEWIPC,
},
Stack: stack,
- SetTLS: flags&syscall.CLONE_SETTLS == syscall.CLONE_SETTLS,
+ SetTLS: flags&linux.CLONE_SETTLS == linux.CLONE_SETTLS,
TLS: tls,
- ChildClearTID: flags&syscall.CLONE_CHILD_CLEARTID == syscall.CLONE_CHILD_CLEARTID,
- ChildSetTID: flags&syscall.CLONE_CHILD_SETTID == syscall.CLONE_CHILD_SETTID,
+ ChildClearTID: flags&linux.CLONE_CHILD_CLEARTID == linux.CLONE_CHILD_CLEARTID,
+ ChildSetTID: flags&linux.CLONE_CHILD_SETTID == linux.CLONE_CHILD_SETTID,
ChildTID: childTID,
- ParentSetTID: flags&syscall.CLONE_PARENT_SETTID == syscall.CLONE_PARENT_SETTID,
+ ParentSetTID: flags&linux.CLONE_PARENT_SETTID == linux.CLONE_PARENT_SETTID,
ParentTID: parentTID,
- Vfork: flags&syscall.CLONE_VFORK == syscall.CLONE_VFORK,
- Untraced: flags&syscall.CLONE_UNTRACED == syscall.CLONE_UNTRACED,
- InheritTracer: flags&syscall.CLONE_PTRACE == syscall.CLONE_PTRACE,
+ Vfork: flags&linux.CLONE_VFORK == linux.CLONE_VFORK,
+ Untraced: flags&linux.CLONE_UNTRACED == linux.CLONE_UNTRACED,
+ InheritTracer: flags&linux.CLONE_PTRACE == linux.CLONE_PTRACE,
}
ntid, ctrl, err := t.Clone(&opts)
return uintptr(ntid), ctrl, err
@@ -168,7 +168,7 @@ func Clone(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
func Fork(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
// "A call to fork() is equivalent to a call to clone(2) specifying flags
// as just SIGCHLD." - fork(2)
- return clone(t, int(syscall.SIGCHLD), 0, 0, 0, 0)
+ return clone(t, int(linux.SIGCHLD), 0, 0, 0, 0)
}
// Vfork implements Linux syscall vfork(2).
@@ -178,7 +178,7 @@ func Vfork(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
//
// CLONE_VM | CLONE_VFORK | SIGCHLD
// """ - vfork(2)
- return clone(t, syscall.CLONE_VM|syscall.CLONE_VFORK|int(syscall.SIGCHLD), 0, 0, 0, 0)
+ return clone(t, linux.CLONE_VM|linux.CLONE_VFORK|int(linux.SIGCHLD), 0, 0, 0, 0)
}
// parseCommonWaitOptions applies the options common to wait4 and waitid to
@@ -193,7 +193,7 @@ func parseCommonWaitOptions(wopts *kernel.WaitOptions, options int) error {
wopts.NonCloneTasks = true
wopts.CloneTasks = true
default:
- return syscall.EINVAL
+ return syserror.EINVAL
}
if options&linux.WCONTINUED != 0 {
wopts.Events |= kernel.EventGroupContinue
@@ -210,7 +210,7 @@ func parseCommonWaitOptions(wopts *kernel.WaitOptions, options int) error {
// wait4 waits for the given child process to exit.
func wait4(t *kernel.Task, pid int, statusAddr usermem.Addr, options int, rusageAddr usermem.Addr) (uintptr, error) {
if options&^(linux.WNOHANG|linux.WUNTRACED|linux.WCONTINUED|linux.WNOTHREAD|linux.WALL|linux.WCLONE) != 0 {
- return 0, syscall.EINVAL
+ return 0, syserror.EINVAL
}
wopts := kernel.WaitOptions{
Events: kernel.EventExit | kernel.EventTraceeStop,
@@ -291,10 +291,10 @@ func Waitid(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
rusageAddr := args[4].Pointer()
if options&^(linux.WNOHANG|linux.WEXITED|linux.WSTOPPED|linux.WCONTINUED|linux.WNOWAIT|linux.WNOTHREAD|linux.WALL|linux.WCLONE) != 0 {
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
if options&(linux.WEXITED|linux.WSTOPPED|linux.WCONTINUED) == 0 {
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
wopts := kernel.WaitOptions{
Events: kernel.EventTraceeStop,
@@ -307,7 +307,7 @@ func Waitid(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
case linux.P_PGID:
wopts.SpecificPGID = kernel.ProcessGroupID(id)
default:
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
if err := parseCommonWaitOptions(&wopts, options); err != nil {
@@ -347,12 +347,12 @@ func Waitid(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
return 0, nil, nil
}
si := arch.SignalInfo{
- Signo: int32(syscall.SIGCHLD),
+ Signo: int32(linux.SIGCHLD),
}
si.SetPid(int32(wr.TID))
si.SetUid(int32(wr.UID))
// TODO(b/73541790): convert kernel.ExitStatus to functions and make
- // WaitResult.Status a linux.WaitStatus
+ // WaitResult.Status a linux.WaitStatus.
s := syscall.WaitStatus(wr.Status)
switch {
case s.Exited():
@@ -374,7 +374,7 @@ func Waitid(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
}
case s.Continued():
si.Code = arch.CLD_CONTINUED
- si.SetStatus(int32(syscall.SIGCONT))
+ si.SetStatus(int32(linux.SIGCONT))
default:
t.Warningf("waitid got incomprehensible wait status %d", s)
}
@@ -395,16 +395,16 @@ func SetTidAddress(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel
func Unshare(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
flags := args[0].Int()
opts := kernel.SharingOptions{
- NewAddressSpace: flags&syscall.CLONE_VM == syscall.CLONE_VM,
- NewSignalHandlers: flags&syscall.CLONE_SIGHAND == syscall.CLONE_SIGHAND,
- NewThreadGroup: flags&syscall.CLONE_THREAD == syscall.CLONE_THREAD,
- NewPIDNamespace: flags&syscall.CLONE_NEWPID == syscall.CLONE_NEWPID,
- NewUserNamespace: flags&syscall.CLONE_NEWUSER == syscall.CLONE_NEWUSER,
- NewNetworkNamespace: flags&syscall.CLONE_NEWNET == syscall.CLONE_NEWNET,
- NewFiles: flags&syscall.CLONE_FILES == syscall.CLONE_FILES,
- NewFSContext: flags&syscall.CLONE_FS == syscall.CLONE_FS,
- NewUTSNamespace: flags&syscall.CLONE_NEWUTS == syscall.CLONE_NEWUTS,
- NewIPCNamespace: flags&syscall.CLONE_NEWIPC == syscall.CLONE_NEWIPC,
+ NewAddressSpace: flags&linux.CLONE_VM == linux.CLONE_VM,
+ NewSignalHandlers: flags&linux.CLONE_SIGHAND == linux.CLONE_SIGHAND,
+ NewThreadGroup: flags&linux.CLONE_THREAD == linux.CLONE_THREAD,
+ NewPIDNamespace: flags&linux.CLONE_NEWPID == linux.CLONE_NEWPID,
+ NewUserNamespace: flags&linux.CLONE_NEWUSER == linux.CLONE_NEWUSER,
+ NewNetworkNamespace: flags&linux.CLONE_NEWNET == linux.CLONE_NEWNET,
+ NewFiles: flags&linux.CLONE_FILES == linux.CLONE_FILES,
+ NewFSContext: flags&linux.CLONE_FS == linux.CLONE_FS,
+ NewUTSNamespace: flags&linux.CLONE_NEWUTS == linux.CLONE_NEWUTS,
+ NewIPCNamespace: flags&linux.CLONE_NEWIPC == linux.CLONE_NEWIPC,
}
// "CLONE_NEWPID automatically implies CLONE_THREAD as well." - unshare(2)
if opts.NewPIDNamespace {
@@ -623,7 +623,7 @@ func Getpriority(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S
who := kernel.ThreadID(args[1].Int())
switch which {
- case syscall.PRIO_PROCESS:
+ case linux.PRIO_PROCESS:
// Look for who, return ESRCH if not found.
var task *kernel.Task
if who == 0 {
@@ -633,7 +633,7 @@ func Getpriority(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S
}
if task == nil {
- return 0, nil, syscall.ESRCH
+ return 0, nil, syserror.ESRCH
}
// From kernel/sys.c:getpriority:
@@ -641,13 +641,13 @@ func Getpriority(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S
// will not return the normal nice-value, but a negated
// value that has been offset by 20"
return uintptr(20 - task.Niceness()), nil, nil
- case syscall.PRIO_USER:
+ case linux.PRIO_USER:
fallthrough
- case syscall.PRIO_PGRP:
+ case linux.PRIO_PGRP:
// PRIO_USER and PRIO_PGRP have no further implementation yet.
return 0, nil, nil
default:
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
}
@@ -669,7 +669,7 @@ func Setpriority(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S
}
switch which {
- case syscall.PRIO_PROCESS:
+ case linux.PRIO_PROCESS:
// Look for who, return ESRCH if not found.
var task *kernel.Task
if who == 0 {
@@ -679,17 +679,17 @@ func Setpriority(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S
}
if task == nil {
- return 0, nil, syscall.ESRCH
+ return 0, nil, syserror.ESRCH
}
task.SetNiceness(niceval)
- case syscall.PRIO_USER:
+ case linux.PRIO_USER:
fallthrough
- case syscall.PRIO_PGRP:
+ case linux.PRIO_PGRP:
// PRIO_USER and PRIO_PGRP have no further implementation yet.
return 0, nil, nil
default:
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
return 0, nil, nil
diff --git a/pkg/sentry/syscalls/linux/sys_time.go b/pkg/sentry/syscalls/linux/sys_time.go
index b4f2609c0..fe8725191 100644
--- a/pkg/sentry/syscalls/linux/sys_time.go
+++ b/pkg/sentry/syscalls/linux/sys_time.go
@@ -17,12 +17,12 @@ package linux
import (
"time"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// The most significant 29 bits hold either a pid or a file descriptor.
diff --git a/pkg/sentry/syscalls/linux/sys_timer.go b/pkg/sentry/syscalls/linux/sys_timer.go
index 04ea7a4e9..d4134207b 100644
--- a/pkg/sentry/syscalls/linux/sys_timer.go
+++ b/pkg/sentry/syscalls/linux/sys_timer.go
@@ -15,13 +15,13 @@
package linux
import (
- "syscall"
"time"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
const nsecPerSec = int64(time.Second)
@@ -47,7 +47,7 @@ func copyItimerValIn(t *kernel.Task, addr usermem.Addr) (linux.ItimerVal, error)
return itv, nil
default:
- return linux.ItimerVal{}, syscall.ENOSYS
+ return linux.ItimerVal{}, syserror.ENOSYS
}
}
@@ -65,7 +65,7 @@ func copyItimerValOut(t *kernel.Task, addr usermem.Addr, itv *linux.ItimerVal) e
_, err := t.CopyOut(addr, itv)
return err
default:
- return syscall.ENOSYS
+ return syserror.ENOSYS
}
}
diff --git a/pkg/sentry/syscalls/linux/sys_timerfd.go b/pkg/sentry/syscalls/linux/sys_timerfd.go
index ec0155cbb..1ce5ce4c3 100644
--- a/pkg/sentry/syscalls/linux/sys_timerfd.go
+++ b/pkg/sentry/syscalls/linux/sys_timerfd.go
@@ -15,14 +15,13 @@
package linux
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/timerfd"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/timerfd"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// TimerfdCreate implements Linux syscall timerfd_create(2).
@@ -49,9 +48,9 @@ func TimerfdCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel
NonBlocking: flags&linux.TFD_NONBLOCK != 0,
})
- fd, err := t.FDMap().NewFDFrom(0, f, kernel.FDFlags{
+ fd, err := t.NewFDFrom(0, f, kernel.FDFlags{
CloseOnExec: flags&linux.TFD_CLOEXEC != 0,
- }, t.ThreadGroup().Limits())
+ })
if err != nil {
return 0, nil, err
}
@@ -61,7 +60,7 @@ func TimerfdCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel
// TimerfdSettime implements Linux syscall timerfd_settime(2).
func TimerfdSettime(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
flags := args[1].Int()
newValAddr := args[2].Pointer()
oldValAddr := args[3].Pointer()
@@ -70,7 +69,7 @@ func TimerfdSettime(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kerne
return 0, nil, syserror.EINVAL
}
- f := t.FDMap().GetFile(fd)
+ f := t.GetFile(fd)
if f == nil {
return 0, nil, syserror.EBADF
}
@@ -101,10 +100,10 @@ func TimerfdSettime(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kerne
// TimerfdGettime implements Linux syscall timerfd_gettime(2).
func TimerfdGettime(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
curValAddr := args[1].Pointer()
- f := t.FDMap().GetFile(fd)
+ f := t.GetFile(fd)
if f == nil {
return 0, nil, syserror.EBADF
}
diff --git a/pkg/sentry/syscalls/linux/sys_tls.go b/pkg/sentry/syscalls/linux/sys_tls.go
index 1e8312e00..b3eb96a1c 100644
--- a/pkg/sentry/syscalls/linux/sys_tls.go
+++ b/pkg/sentry/syscalls/linux/sys_tls.go
@@ -17,11 +17,10 @@
package linux
import (
- "syscall"
-
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// ArchPrctl implements linux syscall arch_prctl(2).
@@ -39,14 +38,14 @@ func ArchPrctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
case linux.ARCH_SET_FS:
fsbase := args[1].Uint64()
if !t.Arch().SetTLS(uintptr(fsbase)) {
- return 0, nil, syscall.EPERM
+ return 0, nil, syserror.EPERM
}
case linux.ARCH_GET_GS, linux.ARCH_SET_GS:
t.Kernel().EmitUnimplementedEvent(t)
fallthrough
default:
- return 0, nil, syscall.EINVAL
+ return 0, nil, syserror.EINVAL
}
return 0, nil, nil
diff --git a/pkg/sentry/syscalls/linux/sys_utsname.go b/pkg/sentry/syscalls/linux/sys_utsname.go
index fa81fe10e..271ace08e 100644
--- a/pkg/sentry/syscalls/linux/sys_utsname.go
+++ b/pkg/sentry/syscalls/linux/sys_utsname.go
@@ -17,10 +17,10 @@
package linux
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// Uname implements linux syscall uname.
diff --git a/pkg/sentry/syscalls/linux/sys_write.go b/pkg/sentry/syscalls/linux/sys_write.go
index 1da72d606..5278c96a6 100644
--- a/pkg/sentry/syscalls/linux/sys_write.go
+++ b/pkg/sentry/syscalls/linux/sys_write.go
@@ -17,16 +17,15 @@ package linux
import (
"time"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/socket"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/waiter"
)
const (
@@ -39,11 +38,11 @@ const (
// Write implements linux syscall write(2).
func Write(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
addr := args[1].Pointer()
size := args[2].SizeT()
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
@@ -75,12 +74,12 @@ func Write(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
// Pwrite64 implements linux syscall pwrite64(2).
func Pwrite64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
addr := args[1].Pointer()
size := args[2].SizeT()
offset := args[3].Int64()
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
@@ -122,11 +121,11 @@ func Pwrite64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
// Writev implements linux syscall writev(2).
func Writev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
addr := args[1].Pointer()
iovcnt := int(args[2].Int())
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
@@ -152,12 +151,12 @@ func Writev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
// Pwritev implements linux syscall pwritev(2).
func Pwritev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
addr := args[1].Pointer()
iovcnt := int(args[2].Int())
offset := args[3].Int64()
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
@@ -202,7 +201,7 @@ func Pwritev2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
// splits the offset argument into a high/low value for compatibility with
// 32-bit architectures. The flags argument is the 5th argument.
- fd := kdefs.FD(args[0].Int())
+ fd := args[0].Int()
addr := args[1].Pointer()
iovcnt := int(args[2].Int())
offset := args[3].Int64()
@@ -212,7 +211,7 @@ func Pwritev2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
return 0, nil, syserror.EACCES
}
- file := t.FDMap().GetFile(fd)
+ file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
diff --git a/pkg/sentry/syscalls/linux/timespec.go b/pkg/sentry/syscalls/linux/timespec.go
index fa6fcdc0b..4ff8f9234 100644
--- a/pkg/sentry/syscalls/linux/timespec.go
+++ b/pkg/sentry/syscalls/linux/timespec.go
@@ -15,13 +15,12 @@
package linux
import (
- "syscall"
"time"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// copyTimespecIn copies a Timespec from the untrusted app range to the kernel.
@@ -70,7 +69,7 @@ func copyTimevalIn(t *kernel.Task, addr usermem.Addr) (linux.Timeval, error) {
tv.Usec = int64(usermem.ByteOrder.Uint64(in[8:]))
return tv, nil
default:
- return linux.Timeval{}, syscall.ENOSYS
+ return linux.Timeval{}, syserror.ENOSYS
}
}
@@ -84,7 +83,7 @@ func copyTimevalOut(t *kernel.Task, addr usermem.Addr, tv *linux.Timeval) error
_, err := t.CopyOutBytes(addr, out)
return err
default:
- return syscall.ENOSYS
+ return syserror.ENOSYS
}
}
@@ -104,7 +103,7 @@ func copyTimespecInToDuration(t *kernel.Task, timespecAddr usermem.Addr) (time.D
return 0, err
}
if !timespec.Valid() {
- return 0, syscall.EINVAL
+ return 0, syserror.EINVAL
}
timeout = time.Duration(timespec.ToNsecCapped())
}
diff --git a/pkg/sentry/syscalls/syscalls.go b/pkg/sentry/syscalls/syscalls.go
index 48c114232..f88055676 100644
--- a/pkg/sentry/syscalls/syscalls.go
+++ b/pkg/sentry/syscalls/syscalls.go
@@ -26,12 +26,11 @@ package syscalls
import (
"fmt"
- "syscall"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// Supported returns a syscall that is fully supported.
@@ -40,16 +39,7 @@ func Supported(name string, fn kernel.SyscallFn) kernel.Syscall {
Name: name,
Fn: fn,
SupportLevel: kernel.SupportFull,
- Note: "Full Support",
- }
-}
-
-// Undocumented returns a syscall that is undocumented.
-func Undocumented(name string, fn kernel.SyscallFn) kernel.Syscall {
- return kernel.Syscall{
- Name: name,
- Fn: fn,
- SupportLevel: kernel.SupportUndocumented,
+ Note: "Fully Supported.",
}
}
@@ -65,7 +55,7 @@ func PartiallySupported(name string, fn kernel.SyscallFn, note string, urls []st
}
// Error returns a syscall handler that will always give the passed error.
-func Error(name string, err syscall.Errno, note string, urls []string) kernel.Syscall {
+func Error(name string, err error, note string, urls []string) kernel.Syscall {
if note != "" {
note = note + "; "
}
@@ -75,14 +65,14 @@ func Error(name string, err syscall.Errno, note string, urls []string) kernel.Sy
return 0, nil, err
},
SupportLevel: kernel.SupportUnimplemented,
- Note: fmt.Sprintf("%sReturns %q", note, err.Error()),
+ Note: fmt.Sprintf("%sReturns %q.", note, err.Error()),
URLs: urls,
}
}
// ErrorWithEvent gives a syscall function that sends an unimplemented
// syscall event via the event channel and returns the passed error.
-func ErrorWithEvent(name string, err syscall.Errno, note string, urls []string) kernel.Syscall {
+func ErrorWithEvent(name string, err error, note string, urls []string) kernel.Syscall {
if note != "" {
note = note + "; "
}
@@ -93,7 +83,7 @@ func ErrorWithEvent(name string, err syscall.Errno, note string, urls []string)
return 0, nil, err
},
SupportLevel: kernel.SupportUnimplemented,
- Note: fmt.Sprintf("%sReturns %q", note, err.Error()),
+ Note: fmt.Sprintf("%sReturns %q.", note, err.Error()),
URLs: urls,
}
}
@@ -115,7 +105,7 @@ func CapError(name string, c linux.Capability, note string, urls []string) kerne
return 0, nil, syserror.ENOSYS
},
SupportLevel: kernel.SupportUnimplemented,
- Note: fmt.Sprintf("%sReturns %q if the process does not have %s; %q otherwise", note, syserror.EPERM, c.String(), syserror.ENOSYS),
+ Note: fmt.Sprintf("%sReturns %q if the process does not have %s; %q otherwise.", note, syserror.EPERM, c.String(), syserror.ENOSYS),
URLs: urls,
}
}
diff --git a/pkg/sentry/time/BUILD b/pkg/sentry/time/BUILD
index b50579a92..8aa6a3017 100644
--- a/pkg/sentry/time/BUILD
+++ b/pkg/sentry/time/BUILD
@@ -6,7 +6,7 @@ load("//tools/go_generics:defs.bzl", "go_template_instance")
go_template_instance(
name = "seqatomic_parameters",
- out = "seqatomic_parameters.go",
+ out = "seqatomic_parameters_unsafe.go",
package = "time",
suffix = "Parameters",
template = "//third_party/gvsync:generic_seqatomic",
@@ -27,11 +27,11 @@ go_library(
"parameters.go",
"sampler.go",
"sampler_unsafe.go",
- "seqatomic_parameters.go",
+ "seqatomic_parameters_unsafe.go",
"tsc_amd64.s",
"tsc_arm64.s",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/time",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/time",
visibility = ["//:sandbox"],
deps = [
"//pkg/log",
diff --git a/pkg/sentry/time/calibrated_clock.go b/pkg/sentry/time/calibrated_clock.go
index c27e391c9..318503277 100644
--- a/pkg/sentry/time/calibrated_clock.go
+++ b/pkg/sentry/time/calibrated_clock.go
@@ -20,9 +20,9 @@ import (
"sync"
"time"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/metric"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/metric"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// fallbackMetric tracks failed updates. It is not sync, as it is not critical
diff --git a/pkg/sentry/time/parameters.go b/pkg/sentry/time/parameters.go
index 63cf7c4a3..65868cb26 100644
--- a/pkg/sentry/time/parameters.go
+++ b/pkg/sentry/time/parameters.go
@@ -18,7 +18,7 @@ import (
"fmt"
"time"
- "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/log"
)
const (
diff --git a/pkg/sentry/time/sampler.go b/pkg/sentry/time/sampler.go
index 2140a99b7..4ac9c4474 100644
--- a/pkg/sentry/time/sampler.go
+++ b/pkg/sentry/time/sampler.go
@@ -17,7 +17,7 @@ package time
import (
"errors"
- "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/log"
)
const (
diff --git a/pkg/sentry/unimpl/BUILD b/pkg/sentry/unimpl/BUILD
index b608867a9..b69603da3 100644
--- a/pkg/sentry/unimpl/BUILD
+++ b/pkg/sentry/unimpl/BUILD
@@ -12,7 +12,7 @@ proto_library(
go_proto_library(
name = "unimplemented_syscall_go_proto",
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/unimpl/unimplemented_syscall_go_proto",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/unimpl/unimplemented_syscall_go_proto",
proto = ":unimplemented_syscall_proto",
visibility = ["//visibility:public"],
deps = ["//pkg/sentry/arch:registers_go_proto"],
@@ -21,7 +21,7 @@ go_proto_library(
go_library(
name = "unimpl",
srcs = ["events.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/unimpl",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/unimpl",
visibility = ["//:sandbox"],
deps = [
"//pkg/log",
diff --git a/pkg/sentry/unimpl/events.go b/pkg/sentry/unimpl/events.go
index d92766e2d..79b5de9e4 100644
--- a/pkg/sentry/unimpl/events.go
+++ b/pkg/sentry/unimpl/events.go
@@ -17,8 +17,8 @@
package unimpl
import (
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/context"
)
// contextID is the events package's type for context.Context.Value keys.
diff --git a/pkg/sentry/uniqueid/BUILD b/pkg/sentry/uniqueid/BUILD
index ccc5a28d3..86a87edd4 100644
--- a/pkg/sentry/uniqueid/BUILD
+++ b/pkg/sentry/uniqueid/BUILD
@@ -5,7 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "uniqueid",
srcs = ["context.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/uniqueid",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/uniqueid",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/sentry/context",
diff --git a/pkg/sentry/uniqueid/context.go b/pkg/sentry/uniqueid/context.go
index e55b89689..4e466d66d 100644
--- a/pkg/sentry/uniqueid/context.go
+++ b/pkg/sentry/uniqueid/context.go
@@ -17,8 +17,8 @@
package uniqueid
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
)
// contextID is the kernel package's type for context.Context.Value keys.
diff --git a/pkg/sentry/usage/BUILD b/pkg/sentry/usage/BUILD
index 860733061..a34c39540 100644
--- a/pkg/sentry/usage/BUILD
+++ b/pkg/sentry/usage/BUILD
@@ -11,7 +11,7 @@ go_library(
"memory_unsafe.go",
"usage.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/usage",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/usage",
visibility = [
"//pkg/sentry:internal",
],
diff --git a/pkg/sentry/usage/memory.go b/pkg/sentry/usage/memory.go
index 9ed974ccb..f4326706a 100644
--- a/pkg/sentry/usage/memory.go
+++ b/pkg/sentry/usage/memory.go
@@ -21,8 +21,8 @@ import (
"sync/atomic"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/bits"
- "gvisor.googlesource.com/gvisor/pkg/memutil"
+ "gvisor.dev/gvisor/pkg/bits"
+ "gvisor.dev/gvisor/pkg/memutil"
)
// MemoryKind represents a type of memory used by the application.
diff --git a/pkg/sentry/usermem/BUILD b/pkg/sentry/usermem/BUILD
index e38b31b08..a5b4206bb 100644
--- a/pkg/sentry/usermem/BUILD
+++ b/pkg/sentry/usermem/BUILD
@@ -28,7 +28,7 @@ go_library(
"usermem_unsafe.go",
"usermem_x86.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/usermem",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/usermem",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/atomicbitops",
diff --git a/pkg/sentry/usermem/bytes_io.go b/pkg/sentry/usermem/bytes_io.go
index f98d82168..8d88396ba 100644
--- a/pkg/sentry/usermem/bytes_io.go
+++ b/pkg/sentry/usermem/bytes_io.go
@@ -15,9 +15,9 @@
package usermem
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/safemem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
const maxInt = int(^uint(0) >> 1)
diff --git a/pkg/sentry/usermem/bytes_io_unsafe.go b/pkg/sentry/usermem/bytes_io_unsafe.go
index bb49d2ff3..fca5952f4 100644
--- a/pkg/sentry/usermem/bytes_io_unsafe.go
+++ b/pkg/sentry/usermem/bytes_io_unsafe.go
@@ -18,8 +18,8 @@ import (
"sync/atomic"
"unsafe"
- "gvisor.googlesource.com/gvisor/pkg/atomicbitops"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/atomicbitops"
+ "gvisor.dev/gvisor/pkg/sentry/context"
)
// SwapUint32 implements IO.SwapUint32.
diff --git a/pkg/sentry/usermem/usermem.go b/pkg/sentry/usermem/usermem.go
index 9dde327a2..6eced660a 100644
--- a/pkg/sentry/usermem/usermem.go
+++ b/pkg/sentry/usermem/usermem.go
@@ -20,10 +20,10 @@ import (
"io"
"strconv"
- "gvisor.googlesource.com/gvisor/pkg/binary"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/safemem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// IO provides access to the contents of a virtual memory space.
diff --git a/pkg/sentry/usermem/usermem_test.go b/pkg/sentry/usermem/usermem_test.go
index 575e5039d..299f64754 100644
--- a/pkg/sentry/usermem/usermem_test.go
+++ b/pkg/sentry/usermem/usermem_test.go
@@ -22,9 +22,9 @@ import (
"strings"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/safemem"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// newContext returns a context.Context that we can use in these tests (we
diff --git a/pkg/sentry/watchdog/BUILD b/pkg/sentry/watchdog/BUILD
index 0bbf3705c..4d8435265 100644
--- a/pkg/sentry/watchdog/BUILD
+++ b/pkg/sentry/watchdog/BUILD
@@ -5,7 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "watchdog",
srcs = ["watchdog.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/watchdog",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/watchdog",
visibility = ["//:sandbox"],
deps = [
"//pkg/abi/linux",
diff --git a/pkg/sentry/watchdog/watchdog.go b/pkg/sentry/watchdog/watchdog.go
index 2fc4472dd..145102c0d 100644
--- a/pkg/sentry/watchdog/watchdog.go
+++ b/pkg/sentry/watchdog/watchdog.go
@@ -35,11 +35,11 @@ import (
"sync"
"time"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/metric"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/metric"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
)
// DefaultTimeout is a resonable timeout value for most applications.
@@ -271,23 +271,23 @@ func (w *Watchdog) reportStuckWatchdog() {
w.onStuckTask(true, &buf)
}
-func (w *Watchdog) onStuckTask(newTaskFound bool, buf *bytes.Buffer) {
+func (w *Watchdog) onStuckTask(newTaskFound bool, msg *bytes.Buffer) {
switch w.timeoutAction {
case LogWarning:
// Dump stack only if a new task is detected or if it sometime has passed since
// the last time a stack dump was generated.
if !newTaskFound && time.Since(w.lastStackDump) < stackDumpSameTaskPeriod {
- buf.WriteString("\n...[stack dump skipped]...")
- log.Warningf(buf.String())
+ msg.WriteString("\n...[stack dump skipped]...")
+ log.Warningf(msg.String())
} else {
- log.TracebackAll(buf.String())
+ log.TracebackAll(msg.String())
w.lastStackDump = time.Now()
}
case Panic:
// Panic will skip over running tasks, which is likely the culprit here. So manually
// dump all stacks before panic'ing.
- log.TracebackAll(buf.String())
+ log.TracebackAll(msg.String())
// Attempt to flush metrics, timeout and move on in case metrics are stuck as well.
metricsEmitted := make(chan struct{}, 1)
@@ -300,6 +300,6 @@ func (w *Watchdog) onStuckTask(newTaskFound bool, buf *bytes.Buffer) {
case <-metricsEmitted:
case <-time.After(1 * time.Second):
}
- panic("Sentry detected stuck task(s). See stack trace and message above for more details")
+ panic(fmt.Sprintf("Stack for running G's are skipped while panicking.\n%s", msg.String()))
}
}
diff --git a/pkg/sleep/BUILD b/pkg/sleep/BUILD
index 2b005bf66..00665c939 100644
--- a/pkg/sleep/BUILD
+++ b/pkg/sleep/BUILD
@@ -10,7 +10,7 @@ go_library(
"commit_noasm.go",
"sleep_unsafe.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/sleep",
+ importpath = "gvisor.dev/gvisor/pkg/sleep",
visibility = ["//:sandbox"],
)
diff --git a/pkg/state/BUILD b/pkg/state/BUILD
index 0a975e162..c0f3c658d 100644
--- a/pkg/state/BUILD
+++ b/pkg/state/BUILD
@@ -49,7 +49,7 @@ go_library(
"state.go",
"stats.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/state",
+ importpath = "gvisor.dev/gvisor/pkg/state",
visibility = ["//:sandbox"],
deps = [
":object_go_proto",
@@ -65,7 +65,7 @@ proto_library(
go_proto_library(
name = "object_go_proto",
- importpath = "gvisor.googlesource.com/gvisor/pkg/state/object_go_proto",
+ importpath = "gvisor.dev/gvisor/pkg/state/object_go_proto",
proto = ":object_proto",
visibility = ["//:sandbox"],
)
diff --git a/pkg/state/decode.go b/pkg/state/decode.go
index 73a59f871..47e6b878a 100644
--- a/pkg/state/decode.go
+++ b/pkg/state/decode.go
@@ -24,7 +24,7 @@ import (
"sort"
"github.com/golang/protobuf/proto"
- pb "gvisor.googlesource.com/gvisor/pkg/state/object_go_proto"
+ pb "gvisor.dev/gvisor/pkg/state/object_go_proto"
)
// objectState represents an object that may be in the process of being
diff --git a/pkg/state/encode.go b/pkg/state/encode.go
index b0714170b..5d9409a45 100644
--- a/pkg/state/encode.go
+++ b/pkg/state/encode.go
@@ -23,7 +23,7 @@ import (
"sort"
"github.com/golang/protobuf/proto"
- pb "gvisor.googlesource.com/gvisor/pkg/state/object_go_proto"
+ pb "gvisor.dev/gvisor/pkg/state/object_go_proto"
)
// queuedObject is an object queued for encoding.
@@ -84,7 +84,7 @@ type encodeState struct {
// register looks up an ID, registering if necessary.
//
-// If the object was not previosly registered, it is enqueued to be serialized.
+// If the object was not previously registered, it is enqueued to be serialized.
// See the documentation for idsByObject for more information.
func (es *encodeState) register(obj reflect.Value) uint64 {
// It is not legal to call register for any non-pointer objects (see
diff --git a/pkg/state/map.go b/pkg/state/map.go
index 1fb9b47b8..7e6fefed4 100644
--- a/pkg/state/map.go
+++ b/pkg/state/map.go
@@ -20,7 +20,7 @@ import (
"sort"
"sync"
- pb "gvisor.googlesource.com/gvisor/pkg/state/object_go_proto"
+ pb "gvisor.dev/gvisor/pkg/state/object_go_proto"
)
// entry is a single map entry.
diff --git a/pkg/state/printer.go b/pkg/state/printer.go
index 5174c3ba3..3ce18242f 100644
--- a/pkg/state/printer.go
+++ b/pkg/state/printer.go
@@ -22,7 +22,7 @@ import (
"strings"
"github.com/golang/protobuf/proto"
- pb "gvisor.googlesource.com/gvisor/pkg/state/object_go_proto"
+ pb "gvisor.dev/gvisor/pkg/state/object_go_proto"
)
// format formats a single object, for pretty-printing. It also returns whether
diff --git a/pkg/state/state.go b/pkg/state/state.go
index cf7df803a..d408ff84a 100644
--- a/pkg/state/state.go
+++ b/pkg/state/state.go
@@ -55,7 +55,7 @@ import (
"reflect"
"runtime"
- pb "gvisor.googlesource.com/gvisor/pkg/state/object_go_proto"
+ pb "gvisor.dev/gvisor/pkg/state/object_go_proto"
)
// ErrState is returned when an error is encountered during encode/decode.
diff --git a/pkg/state/statefile/BUILD b/pkg/state/statefile/BUILD
index 5967781e8..e70f4a79f 100644
--- a/pkg/state/statefile/BUILD
+++ b/pkg/state/statefile/BUILD
@@ -5,7 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "statefile",
srcs = ["statefile.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/state/statefile",
+ importpath = "gvisor.dev/gvisor/pkg/state/statefile",
visibility = ["//:sandbox"],
deps = [
"//pkg/binary",
diff --git a/pkg/state/statefile/statefile.go b/pkg/state/statefile/statefile.go
index ad4e3b43e..c0f4c4954 100644
--- a/pkg/state/statefile/statefile.go
+++ b/pkg/state/statefile/statefile.go
@@ -55,8 +55,8 @@ import (
"strings"
"time"
- "gvisor.googlesource.com/gvisor/pkg/binary"
- "gvisor.googlesource.com/gvisor/pkg/compressio"
+ "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/compressio"
)
// keySize is the AES-256 key length.
diff --git a/pkg/state/statefile/statefile_test.go b/pkg/state/statefile/statefile_test.go
index 60b769895..0b470fdec 100644
--- a/pkg/state/statefile/statefile_test.go
+++ b/pkg/state/statefile/statefile_test.go
@@ -24,7 +24,7 @@ import (
"testing"
"time"
- "gvisor.googlesource.com/gvisor/pkg/compressio"
+ "gvisor.dev/gvisor/pkg/compressio"
)
func randomKey() ([]byte, error) {
diff --git a/pkg/syserr/BUILD b/pkg/syserr/BUILD
index 0d65115ef..5665ad4ee 100644
--- a/pkg/syserr/BUILD
+++ b/pkg/syserr/BUILD
@@ -9,7 +9,7 @@ go_library(
"netstack.go",
"syserr.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/syserr",
+ importpath = "gvisor.dev/gvisor/pkg/syserr",
visibility = ["//visibility:public"],
deps = [
"//pkg/abi/linux",
diff --git a/pkg/syserr/netstack.go b/pkg/syserr/netstack.go
index bd489b424..8ff922c69 100644
--- a/pkg/syserr/netstack.go
+++ b/pkg/syserr/netstack.go
@@ -15,8 +15,8 @@
package syserr
import (
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/tcpip"
)
// Mapping for tcpip.Error types.
@@ -49,43 +49,44 @@ var (
)
var netstackErrorTranslations = map[*tcpip.Error]*Error{
- tcpip.ErrUnknownProtocol: ErrUnknownProtocol,
- tcpip.ErrUnknownNICID: ErrUnknownNICID,
- tcpip.ErrUnknownDevice: ErrUnknownDevice,
- tcpip.ErrUnknownProtocolOption: ErrUnknownProtocolOption,
- tcpip.ErrDuplicateNICID: ErrDuplicateNICID,
- tcpip.ErrDuplicateAddress: ErrDuplicateAddress,
- tcpip.ErrNoRoute: ErrNoRoute,
- tcpip.ErrBadLinkEndpoint: ErrBadLinkEndpoint,
- tcpip.ErrAlreadyBound: ErrAlreadyBound,
- tcpip.ErrInvalidEndpointState: ErrInvalidEndpointState,
- tcpip.ErrAlreadyConnecting: ErrAlreadyConnecting,
- tcpip.ErrAlreadyConnected: ErrAlreadyConnected,
- tcpip.ErrNoPortAvailable: ErrNoPortAvailable,
- tcpip.ErrPortInUse: ErrPortInUse,
- tcpip.ErrBadLocalAddress: ErrBadLocalAddress,
- tcpip.ErrClosedForSend: ErrClosedForSend,
- tcpip.ErrClosedForReceive: ErrClosedForReceive,
- tcpip.ErrWouldBlock: ErrWouldBlock,
- tcpip.ErrConnectionRefused: ErrConnectionRefused,
- tcpip.ErrTimeout: ErrTimeout,
- tcpip.ErrAborted: ErrAborted,
- tcpip.ErrConnectStarted: ErrConnectStarted,
- tcpip.ErrDestinationRequired: ErrDestinationRequired,
- tcpip.ErrNotSupported: ErrNotSupported,
- tcpip.ErrQueueSizeNotSupported: ErrQueueSizeNotSupported,
- tcpip.ErrNotConnected: ErrNotConnected,
- tcpip.ErrConnectionReset: ErrConnectionReset,
- tcpip.ErrConnectionAborted: ErrConnectionAborted,
- tcpip.ErrNoSuchFile: ErrNoSuchFile,
- tcpip.ErrInvalidOptionValue: ErrInvalidOptionValue,
- tcpip.ErrNoLinkAddress: ErrHostDown,
- tcpip.ErrBadAddress: ErrBadAddress,
- tcpip.ErrNetworkUnreachable: ErrNetworkUnreachable,
- tcpip.ErrMessageTooLong: ErrMessageTooLong,
- tcpip.ErrNoBufferSpace: ErrNoBufferSpace,
- tcpip.ErrBroadcastDisabled: ErrBroadcastDisabled,
- tcpip.ErrNotPermitted: ErrNotPermittedNet,
+ tcpip.ErrUnknownProtocol: ErrUnknownProtocol,
+ tcpip.ErrUnknownNICID: ErrUnknownNICID,
+ tcpip.ErrUnknownDevice: ErrUnknownDevice,
+ tcpip.ErrUnknownProtocolOption: ErrUnknownProtocolOption,
+ tcpip.ErrDuplicateNICID: ErrDuplicateNICID,
+ tcpip.ErrDuplicateAddress: ErrDuplicateAddress,
+ tcpip.ErrNoRoute: ErrNoRoute,
+ tcpip.ErrBadLinkEndpoint: ErrBadLinkEndpoint,
+ tcpip.ErrAlreadyBound: ErrAlreadyBound,
+ tcpip.ErrInvalidEndpointState: ErrInvalidEndpointState,
+ tcpip.ErrAlreadyConnecting: ErrAlreadyConnecting,
+ tcpip.ErrAlreadyConnected: ErrAlreadyConnected,
+ tcpip.ErrNoPortAvailable: ErrNoPortAvailable,
+ tcpip.ErrPortInUse: ErrPortInUse,
+ tcpip.ErrBadLocalAddress: ErrBadLocalAddress,
+ tcpip.ErrClosedForSend: ErrClosedForSend,
+ tcpip.ErrClosedForReceive: ErrClosedForReceive,
+ tcpip.ErrWouldBlock: ErrWouldBlock,
+ tcpip.ErrConnectionRefused: ErrConnectionRefused,
+ tcpip.ErrTimeout: ErrTimeout,
+ tcpip.ErrAborted: ErrAborted,
+ tcpip.ErrConnectStarted: ErrConnectStarted,
+ tcpip.ErrDestinationRequired: ErrDestinationRequired,
+ tcpip.ErrNotSupported: ErrNotSupported,
+ tcpip.ErrQueueSizeNotSupported: ErrQueueSizeNotSupported,
+ tcpip.ErrNotConnected: ErrNotConnected,
+ tcpip.ErrConnectionReset: ErrConnectionReset,
+ tcpip.ErrConnectionAborted: ErrConnectionAborted,
+ tcpip.ErrNoSuchFile: ErrNoSuchFile,
+ tcpip.ErrInvalidOptionValue: ErrInvalidOptionValue,
+ tcpip.ErrNoLinkAddress: ErrHostDown,
+ tcpip.ErrBadAddress: ErrBadAddress,
+ tcpip.ErrNetworkUnreachable: ErrNetworkUnreachable,
+ tcpip.ErrMessageTooLong: ErrMessageTooLong,
+ tcpip.ErrNoBufferSpace: ErrNoBufferSpace,
+ tcpip.ErrBroadcastDisabled: ErrBroadcastDisabled,
+ tcpip.ErrNotPermitted: ErrNotPermittedNet,
+ tcpip.ErrAddressFamilyNotSupported: ErrAddressFamilyNotSupported,
}
// TranslateNetstackError converts an error from the tcpip package to a sentry
diff --git a/pkg/syserr/syserr.go b/pkg/syserr/syserr.go
index 4ddbd3322..ac4b799c3 100644
--- a/pkg/syserr/syserr.go
+++ b/pkg/syserr/syserr.go
@@ -21,8 +21,8 @@ import (
"fmt"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/syserror"
)
// Error represents an internal error.
diff --git a/pkg/syserror/BUILD b/pkg/syserror/BUILD
index ac478d0ff..b149f9e02 100644
--- a/pkg/syserror/BUILD
+++ b/pkg/syserror/BUILD
@@ -5,7 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "syserror",
srcs = ["syserror.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/syserror",
+ importpath = "gvisor.dev/gvisor/pkg/syserror",
visibility = ["//visibility:public"],
)
diff --git a/pkg/syserror/syserror.go b/pkg/syserror/syserror.go
index 345653544..1987e89cc 100644
--- a/pkg/syserror/syserror.go
+++ b/pkg/syserror/syserror.go
@@ -48,6 +48,7 @@ var (
EMSGSIZE = error(syscall.EMSGSIZE)
ENAMETOOLONG = error(syscall.ENAMETOOLONG)
ENOATTR = ENODATA
+ ENOBUFS = error(syscall.ENOBUFS)
ENODATA = error(syscall.ENODATA)
ENODEV = error(syscall.ENODEV)
ENOENT = error(syscall.ENOENT)
@@ -59,6 +60,7 @@ var (
ENOSYS = error(syscall.ENOSYS)
ENOTDIR = error(syscall.ENOTDIR)
ENOTEMPTY = error(syscall.ENOTEMPTY)
+ ENOTSOCK = error(syscall.ENOTSOCK)
ENOTSUP = error(syscall.ENOTSUP)
ENOTTY = error(syscall.ENOTTY)
ENXIO = error(syscall.ENXIO)
diff --git a/pkg/syserror/syserror_test.go b/pkg/syserror/syserror_test.go
index f2a10ee7b..29719752e 100644
--- a/pkg/syserror/syserror_test.go
+++ b/pkg/syserror/syserror_test.go
@@ -19,7 +19,7 @@ import (
"syscall"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/syserror"
)
var globalError error
diff --git a/pkg/tcpip/BUILD b/pkg/tcpip/BUILD
index 83524cc8a..047f8329a 100644
--- a/pkg/tcpip/BUILD
+++ b/pkg/tcpip/BUILD
@@ -8,7 +8,7 @@ go_library(
"tcpip.go",
"time_unsafe.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip",
+ importpath = "gvisor.dev/gvisor/pkg/tcpip",
visibility = ["//visibility:public"],
deps = [
"//pkg/tcpip/buffer",
diff --git a/pkg/tcpip/adapters/gonet/BUILD b/pkg/tcpip/adapters/gonet/BUILD
index ee2417238..c40924852 100644
--- a/pkg/tcpip/adapters/gonet/BUILD
+++ b/pkg/tcpip/adapters/gonet/BUILD
@@ -5,7 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "gonet",
srcs = ["gonet.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/adapters/gonet",
+ importpath = "gvisor.dev/gvisor/pkg/tcpip/adapters/gonet",
visibility = ["//visibility:public"],
deps = [
"//pkg/tcpip",
diff --git a/pkg/tcpip/adapters/gonet/gonet.go b/pkg/tcpip/adapters/gonet/gonet.go
index 2153222cf..308f620e5 100644
--- a/pkg/tcpip/adapters/gonet/gonet.go
+++ b/pkg/tcpip/adapters/gonet/gonet.go
@@ -23,12 +23,12 @@ import (
"sync"
"time"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/udp"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/tcp"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/udp"
+ "gvisor.dev/gvisor/pkg/waiter"
)
var (
diff --git a/pkg/tcpip/adapters/gonet/gonet_test.go b/pkg/tcpip/adapters/gonet/gonet_test.go
index 2552004a9..39efe44c7 100644
--- a/pkg/tcpip/adapters/gonet/gonet_test.go
+++ b/pkg/tcpip/adapters/gonet/gonet_test.go
@@ -25,14 +25,14 @@ import (
"time"
"golang.org/x/net/nettest"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/loopback"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/udp"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/link/loopback"
+ "gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
+ "gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/tcp"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/udp"
+ "gvisor.dev/gvisor/pkg/waiter"
)
const (
diff --git a/pkg/tcpip/buffer/BUILD b/pkg/tcpip/buffer/BUILD
index 648d12cdf..3301967fb 100644
--- a/pkg/tcpip/buffer/BUILD
+++ b/pkg/tcpip/buffer/BUILD
@@ -8,7 +8,7 @@ go_library(
"prependable.go",
"view.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer",
+ importpath = "gvisor.dev/gvisor/pkg/tcpip/buffer",
visibility = ["//visibility:public"],
)
diff --git a/pkg/tcpip/buffer/view.go b/pkg/tcpip/buffer/view.go
index 1a9d40778..150310c11 100644
--- a/pkg/tcpip/buffer/view.go
+++ b/pkg/tcpip/buffer/view.go
@@ -50,7 +50,7 @@ func (v View) ToVectorisedView() VectorisedView {
return NewVectorisedView(len(v), []View{v})
}
-// VectorisedView is a vectorised version of View using non contigous memory.
+// VectorisedView is a vectorised version of View using non contiguous memory.
// It supports all the convenience methods supported by View.
//
// +stateify savable
diff --git a/pkg/tcpip/checker/BUILD b/pkg/tcpip/checker/BUILD
index f597d0b24..4cecfb989 100644
--- a/pkg/tcpip/checker/BUILD
+++ b/pkg/tcpip/checker/BUILD
@@ -6,7 +6,7 @@ go_library(
name = "checker",
testonly = 1,
srcs = ["checker.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/checker",
+ importpath = "gvisor.dev/gvisor/pkg/tcpip/checker",
visibility = ["//visibility:public"],
deps = [
"//pkg/tcpip",
diff --git a/pkg/tcpip/checker/checker.go b/pkg/tcpip/checker/checker.go
index 6e7edf3ab..afcabd51d 100644
--- a/pkg/tcpip/checker/checker.go
+++ b/pkg/tcpip/checker/checker.go
@@ -21,9 +21,9 @@ import (
"reflect"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/seqnum"
)
// NetworkChecker is a function to check a property of a network packet.
diff --git a/pkg/tcpip/hash/jenkins/BUILD b/pkg/tcpip/hash/jenkins/BUILD
index ce2194a4d..29b30be9c 100644
--- a/pkg/tcpip/hash/jenkins/BUILD
+++ b/pkg/tcpip/hash/jenkins/BUILD
@@ -5,7 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "jenkins",
srcs = ["jenkins.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/hash/jenkins",
+ importpath = "gvisor.dev/gvisor/pkg/tcpip/hash/jenkins",
visibility = [
"//visibility:public",
],
diff --git a/pkg/tcpip/header/BUILD b/pkg/tcpip/header/BUILD
index a5c7290ee..76ef02f13 100644
--- a/pkg/tcpip/header/BUILD
+++ b/pkg/tcpip/header/BUILD
@@ -18,7 +18,7 @@ go_library(
"tcp.go",
"udp.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/header",
+ importpath = "gvisor.dev/gvisor/pkg/tcpip/header",
visibility = ["//visibility:public"],
deps = [
"//pkg/tcpip",
diff --git a/pkg/tcpip/header/arp.go b/pkg/tcpip/header/arp.go
index 55fe7292c..718a4720a 100644
--- a/pkg/tcpip/header/arp.go
+++ b/pkg/tcpip/header/arp.go
@@ -14,7 +14,7 @@
package header
-import "gvisor.googlesource.com/gvisor/pkg/tcpip"
+import "gvisor.dev/gvisor/pkg/tcpip"
const (
// ARPProtocolNumber is the ARP network protocol number.
diff --git a/pkg/tcpip/header/checksum.go b/pkg/tcpip/header/checksum.go
index 2eaa7938a..39a4d69be 100644
--- a/pkg/tcpip/header/checksum.go
+++ b/pkg/tcpip/header/checksum.go
@@ -19,8 +19,8 @@ package header
import (
"encoding/binary"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
)
func calculateChecksum(buf []byte, initial uint32) uint16 {
diff --git a/pkg/tcpip/header/eth.go b/pkg/tcpip/header/eth.go
index 76143f454..4c3d3311f 100644
--- a/pkg/tcpip/header/eth.go
+++ b/pkg/tcpip/header/eth.go
@@ -17,7 +17,7 @@ package header
import (
"encoding/binary"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip"
)
const (
diff --git a/pkg/tcpip/header/icmpv4.go b/pkg/tcpip/header/icmpv4.go
index 782e1053c..c081de61f 100644
--- a/pkg/tcpip/header/icmpv4.go
+++ b/pkg/tcpip/header/icmpv4.go
@@ -17,7 +17,7 @@ package header
import (
"encoding/binary"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip"
)
// ICMPv4 represents an ICMPv4 header stored in a byte array.
diff --git a/pkg/tcpip/header/icmpv6.go b/pkg/tcpip/header/icmpv6.go
index d0b10d849..3cc57e234 100644
--- a/pkg/tcpip/header/icmpv6.go
+++ b/pkg/tcpip/header/icmpv6.go
@@ -17,7 +17,7 @@ package header
import (
"encoding/binary"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip"
)
// ICMPv6 represents an ICMPv6 header stored in a byte array.
diff --git a/pkg/tcpip/header/interfaces.go b/pkg/tcpip/header/interfaces.go
index fb250ea30..861cbbb70 100644
--- a/pkg/tcpip/header/interfaces.go
+++ b/pkg/tcpip/header/interfaces.go
@@ -15,7 +15,7 @@
package header
import (
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip"
)
const (
diff --git a/pkg/tcpip/header/ipv4.go b/pkg/tcpip/header/ipv4.go
index 96e461491..7b8034de4 100644
--- a/pkg/tcpip/header/ipv4.go
+++ b/pkg/tcpip/header/ipv4.go
@@ -17,7 +17,7 @@ package header
import (
"encoding/binary"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip"
)
const (
@@ -85,6 +85,10 @@ const (
// units, the header cannot exceed 15*4 = 60 bytes.
IPv4MaximumHeaderSize = 60
+ // MinIPFragmentPayloadSize is the minimum number of payload bytes that
+ // the first fragment must carry when an IPv4 packet is fragmented.
+ MinIPFragmentPayloadSize = 8
+
// IPv4AddressSize is the size, in bytes, of an IPv4 address.
IPv4AddressSize = 4
diff --git a/pkg/tcpip/header/ipv6.go b/pkg/tcpip/header/ipv6.go
index 66820a466..7163eaa36 100644
--- a/pkg/tcpip/header/ipv6.go
+++ b/pkg/tcpip/header/ipv6.go
@@ -18,7 +18,7 @@ import (
"encoding/binary"
"strings"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip"
)
const (
diff --git a/pkg/tcpip/header/ipv6_fragment.go b/pkg/tcpip/header/ipv6_fragment.go
index 6d896355a..018555a26 100644
--- a/pkg/tcpip/header/ipv6_fragment.go
+++ b/pkg/tcpip/header/ipv6_fragment.go
@@ -17,7 +17,7 @@ package header
import (
"encoding/binary"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip"
)
const (
diff --git a/pkg/tcpip/header/ipversion_test.go b/pkg/tcpip/header/ipversion_test.go
index 0c830180e..b5540bf66 100644
--- a/pkg/tcpip/header/ipversion_test.go
+++ b/pkg/tcpip/header/ipversion_test.go
@@ -17,7 +17,7 @@ package header_test
import (
"testing"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
)
func TestIPv4(t *testing.T) {
diff --git a/pkg/tcpip/header/tcp.go b/pkg/tcpip/header/tcp.go
index 0cd89b992..82cfe785c 100644
--- a/pkg/tcpip/header/tcp.go
+++ b/pkg/tcpip/header/tcp.go
@@ -18,8 +18,8 @@ import (
"encoding/binary"
"github.com/google/btree"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/seqnum"
)
// These constants are the offsets of the respective fields in the TCP header.
@@ -176,6 +176,21 @@ const (
// TCPProtocolNumber is TCP's transport protocol number.
TCPProtocolNumber tcpip.TransportProtocolNumber = 6
+
+ // TCPMinimumMSS is the minimum acceptable value for MSS. This is the
+ // same as the value TCP_MIN_MSS defined net/tcp.h.
+ TCPMinimumMSS = IPv4MaximumHeaderSize + TCPHeaderMaximumSize + MinIPFragmentPayloadSize - IPv4MinimumSize - TCPMinimumSize
+
+ // TCPMaximumMSS is the maximum acceptable value for MSS.
+ TCPMaximumMSS = 0xffff
+
+ // TCPDefaultMSS is the MSS value that should be used if an MSS option
+ // is not received from the peer. It's also the value returned by
+ // TCP_MAXSEG option for a socket in an unconnected state.
+ //
+ // Per RFC 1122, page 85: "If an MSS option is not received at
+ // connection setup, TCP MUST assume a default send MSS of 536."
+ TCPDefaultMSS = 536
)
// SourcePort returns the "source port" field of the tcp header.
@@ -306,7 +321,7 @@ func ParseSynOptions(opts []byte, isAck bool) TCPSynOptions {
synOpts := TCPSynOptions{
// Per RFC 1122, page 85: "If an MSS option is not received at
// connection setup, TCP MUST assume a default send MSS of 536."
- MSS: 536,
+ MSS: TCPDefaultMSS,
// If no window scale option is specified, WS in options is
// returned as -1; this is because the absence of the option
// indicates that the we cannot use window scaling on the
diff --git a/pkg/tcpip/header/tcp_test.go b/pkg/tcpip/header/tcp_test.go
index 9a2b99489..72563837b 100644
--- a/pkg/tcpip/header/tcp_test.go
+++ b/pkg/tcpip/header/tcp_test.go
@@ -18,7 +18,7 @@ import (
"reflect"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
)
func TestEncodeSACKBlocks(t *testing.T) {
diff --git a/pkg/tcpip/header/udp.go b/pkg/tcpip/header/udp.go
index 2205fec18..c1f454805 100644
--- a/pkg/tcpip/header/udp.go
+++ b/pkg/tcpip/header/udp.go
@@ -17,7 +17,7 @@ package header
import (
"encoding/binary"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip"
)
const (
diff --git a/pkg/tcpip/iptables/BUILD b/pkg/tcpip/iptables/BUILD
new file mode 100644
index 000000000..fc9abbb55
--- /dev/null
+++ b/pkg/tcpip/iptables/BUILD
@@ -0,0 +1,18 @@
+package(licenses = ["notice"])
+
+load("//tools/go_stateify:defs.bzl", "go_library")
+
+go_library(
+ name = "iptables",
+ srcs = [
+ "iptables.go",
+ "targets.go",
+ "types.go",
+ ],
+ importpath = "gvisor.dev/gvisor/pkg/tcpip/iptables",
+ visibility = ["//visibility:public"],
+ deps = [
+ "//pkg/tcpip",
+ "//pkg/tcpip/buffer",
+ ],
+)
diff --git a/pkg/tcpip/iptables/iptables.go b/pkg/tcpip/iptables/iptables.go
new file mode 100644
index 000000000..f1e1d1fad
--- /dev/null
+++ b/pkg/tcpip/iptables/iptables.go
@@ -0,0 +1,81 @@
+// Copyright 2019 The gVisor authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package iptables supports packet filtering and manipulation via the iptables
+// tool.
+package iptables
+
+const (
+ tablenameNat = "nat"
+ tablenameMangle = "mangle"
+)
+
+// Chain names as defined by net/ipv4/netfilter/ip_tables.c.
+const (
+ chainNamePrerouting = "PREROUTING"
+ chainNameInput = "INPUT"
+ chainNameForward = "FORWARD"
+ chainNameOutput = "OUTPUT"
+ chainNamePostrouting = "POSTROUTING"
+)
+
+// DefaultTables returns a default set of tables. Each chain is set to accept
+// all packets.
+func DefaultTables() *IPTables {
+ return &IPTables{
+ Tables: map[string]Table{
+ tablenameNat: Table{
+ BuiltinChains: map[Hook]Chain{
+ Prerouting: unconditionalAcceptChain(chainNamePrerouting),
+ Input: unconditionalAcceptChain(chainNameInput),
+ Output: unconditionalAcceptChain(chainNameOutput),
+ Postrouting: unconditionalAcceptChain(chainNamePostrouting),
+ },
+ DefaultTargets: map[Hook]Target{
+ Prerouting: UnconditionalAcceptTarget{},
+ Input: UnconditionalAcceptTarget{},
+ Output: UnconditionalAcceptTarget{},
+ Postrouting: UnconditionalAcceptTarget{},
+ },
+ UserChains: map[string]Chain{},
+ },
+ tablenameMangle: Table{
+ BuiltinChains: map[Hook]Chain{
+ Prerouting: unconditionalAcceptChain(chainNamePrerouting),
+ Output: unconditionalAcceptChain(chainNameOutput),
+ },
+ DefaultTargets: map[Hook]Target{
+ Prerouting: UnconditionalAcceptTarget{},
+ Output: UnconditionalAcceptTarget{},
+ },
+ UserChains: map[string]Chain{},
+ },
+ },
+ Priorities: map[Hook][]string{
+ Prerouting: []string{tablenameMangle, tablenameNat},
+ Output: []string{tablenameMangle, tablenameNat},
+ },
+ }
+}
+
+func unconditionalAcceptChain(name string) Chain {
+ return Chain{
+ Name: name,
+ Rules: []Rule{
+ Rule{
+ Target: UnconditionalAcceptTarget{},
+ },
+ },
+ }
+}
diff --git a/pkg/tcpip/iptables/targets.go b/pkg/tcpip/iptables/targets.go
new file mode 100644
index 000000000..19a7f77e3
--- /dev/null
+++ b/pkg/tcpip/iptables/targets.go
@@ -0,0 +1,35 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This file contains various Targets.
+
+package iptables
+
+import "gvisor.dev/gvisor/pkg/tcpip/buffer"
+
+// UnconditionalAcceptTarget accepts all packets.
+type UnconditionalAcceptTarget struct{}
+
+// Action implements Target.Action.
+func (UnconditionalAcceptTarget) Action(packet buffer.VectorisedView) (Verdict, string) {
+ return Accept, ""
+}
+
+// UnconditionalDropTarget denies all packets.
+type UnconditionalDropTarget struct{}
+
+// Action implements Target.Action.
+func (UnconditionalDropTarget) Action(packet buffer.VectorisedView) (Verdict, string) {
+ return Drop, ""
+}
diff --git a/pkg/tcpip/iptables/types.go b/pkg/tcpip/iptables/types.go
new file mode 100644
index 000000000..600bd9a10
--- /dev/null
+++ b/pkg/tcpip/iptables/types.go
@@ -0,0 +1,183 @@
+// Copyright 2019 The gVisor authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package iptables
+
+import (
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+)
+
+// A Hook specifies one of the hooks built into the network stack.
+//
+// Userspace app Userspace app
+// ^ |
+// | v
+// [Input] [Output]
+// ^ |
+// | v
+// | routing
+// | |
+// | v
+// ----->[Prerouting]----->routing----->[Forward]---------[Postrouting]----->
+type Hook uint
+
+// These values correspond to values in include/uapi/linux/netfilter.h.
+const (
+ // Prerouting happens before a packet is routed to applications or to
+ // be forwarded.
+ Prerouting Hook = iota
+
+ // Input happens before a packet reaches an application.
+ Input
+
+ // Forward happens once it's decided that a packet should be forwarded
+ // to another host.
+ Forward
+
+ // Output happens after a packet is written by an application to be
+ // sent out.
+ Output
+
+ // Postrouting happens just before a packet goes out on the wire.
+ Postrouting
+
+ // The total number of hooks.
+ NumHooks
+)
+
+// A Verdict is returned by a rule's target to indicate how traversal of rules
+// should (or should not) continue.
+type Verdict int
+
+const (
+ // Accept indicates the packet should continue traversing netstack as
+ // normal.
+ Accept Verdict = iota
+
+ // Drop inicates the packet should be dropped, stopping traversing
+ // netstack.
+ Drop
+
+ // Stolen indicates the packet was co-opted by the target and should
+ // stop traversing netstack.
+ Stolen
+
+ // Queue indicates the packet should be queued for userspace processing.
+ Queue
+
+ // Repeat indicates the packet should re-traverse the chains for the
+ // current hook.
+ Repeat
+
+ // None indicates no verdict was reached.
+ None
+
+ // Jump indicates a jump to another chain.
+ Jump
+
+ // Continue indicates that traversal should continue at the next rule.
+ Continue
+
+ // Return indicates that traversal should return to the calling chain.
+ Return
+)
+
+// IPTables holds all the tables for a netstack.
+type IPTables struct {
+ // Tables maps table names to tables. User tables have arbitrary names.
+ Tables map[string]Table
+
+ // Priorities maps each hook to a list of table names. The order of the
+ // list is the order in which each table should be visited for that
+ // hook.
+ Priorities map[Hook][]string
+}
+
+// A Table defines a set of chains and hooks into the network stack. The
+// currently supported tables are:
+// * nat
+// * mangle
+type Table struct {
+ // BuiltinChains holds the un-deletable chains built into netstack. If
+ // a hook isn't present in the map, this table doesn't utilize that
+ // hook.
+ BuiltinChains map[Hook]Chain
+
+ // DefaultTargets holds a target for each hook that will be executed if
+ // chain traversal doesn't yield a verdict.
+ DefaultTargets map[Hook]Target
+
+ // UserChains holds user-defined chains for the keyed by name. Users
+ // can give their chains arbitrary names.
+ UserChains map[string]Chain
+
+ // Chains maps names to chains for both builtin and user-defined chains.
+ // Its entries point to Chains already either in BuiltinChains or
+ // UserChains, and its purpose is to make looking up tables by name
+ // fast.
+ Chains map[string]*Chain
+}
+
+// ValidHooks returns a bitmap of the builtin hooks for the given table.
+func (table *Table) ValidHooks() (uint32, *tcpip.Error) {
+ hooks := uint32(0)
+ for hook, _ := range table.BuiltinChains {
+ hooks |= 1 << hook
+ }
+ return hooks, nil
+}
+
+// A Chain defines a list of rules for packet processing. When a packet
+// traverses a chain, it is checked against each rule until either a rule
+// returns a verdict or the chain ends.
+//
+// By convention, builtin chains end with a rule that matches everything and
+// returns either Accept or Drop. User-defined chains end with Return. These
+// aren't strictly necessary here, but the iptables tool writes tables this way.
+type Chain struct {
+ // Name is the chain name.
+ Name string
+
+ // Rules is the list of rules to traverse.
+ Rules []Rule
+}
+
+// A Rule is a packet processing rule. It consists of two pieces. First it
+// contains zero or more matchers, each of which is a specification of which
+// packets this rule applies to. If there are no matchers in the rule, it
+// applies to any packet.
+type Rule struct {
+ // Matchers is the list of matchers for this rule.
+ Matchers []Matcher
+
+ // Target is the action to invoke if all the matchers match the packet.
+ Target Target
+}
+
+// A Matcher is the interface for matching packets.
+type Matcher interface {
+ // Match returns whether the packet matches and whether the packet
+ // should be "hotdropped", i.e. dropped immediately. This is usually
+ // used for suspicious packets.
+ Match(hook Hook, packet buffer.VectorisedView, interfaceName string) (matches bool, hotdrop bool)
+}
+
+// A Target is the interface for taking an action for a packet.
+type Target interface {
+ // Action takes an action on the packet and returns a verdict on how
+ // traversal should (or should not) continue. If the return value is
+ // Jump, it also returns the name of the chain to jump to.
+ Action(packet buffer.VectorisedView) (Verdict, string)
+}
diff --git a/pkg/tcpip/link/channel/BUILD b/pkg/tcpip/link/channel/BUILD
index ae285e495..97a794986 100644
--- a/pkg/tcpip/link/channel/BUILD
+++ b/pkg/tcpip/link/channel/BUILD
@@ -5,7 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "channel",
srcs = ["channel.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/link/channel",
+ importpath = "gvisor.dev/gvisor/pkg/tcpip/link/channel",
visibility = ["//:sandbox"],
deps = [
"//pkg/tcpip",
diff --git a/pkg/tcpip/link/channel/channel.go b/pkg/tcpip/link/channel/channel.go
index ee9dd8700..c40744b8e 100644
--- a/pkg/tcpip/link/channel/channel.go
+++ b/pkg/tcpip/link/channel/channel.go
@@ -18,9 +18,9 @@
package channel
import (
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
)
// PacketInfo holds all the information about an outbound packet.
diff --git a/pkg/tcpip/link/fdbased/BUILD b/pkg/tcpip/link/fdbased/BUILD
index cef98c353..d786d8fdf 100644
--- a/pkg/tcpip/link/fdbased/BUILD
+++ b/pkg/tcpip/link/fdbased/BUILD
@@ -12,7 +12,7 @@ go_library(
"mmap_amd64_unsafe.go",
"packet_dispatchers.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/link/fdbased",
+ importpath = "gvisor.dev/gvisor/pkg/tcpip/link/fdbased",
visibility = [
"//visibility:public",
],
diff --git a/pkg/tcpip/link/fdbased/endpoint.go b/pkg/tcpip/link/fdbased/endpoint.go
index b88e2e7bf..77f988b9f 100644
--- a/pkg/tcpip/link/fdbased/endpoint.go
+++ b/pkg/tcpip/link/fdbased/endpoint.go
@@ -44,11 +44,11 @@ import (
"syscall"
"golang.org/x/sys/unix"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/rawfile"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/link/rawfile"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
)
// linkDispatcher reads packets from the link FD and dispatches them to the
diff --git a/pkg/tcpip/link/fdbased/endpoint_test.go b/pkg/tcpip/link/fdbased/endpoint_test.go
index ba3e09192..e305252d6 100644
--- a/pkg/tcpip/link/fdbased/endpoint_test.go
+++ b/pkg/tcpip/link/fdbased/endpoint_test.go
@@ -26,11 +26,11 @@ import (
"time"
"unsafe"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/rawfile"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/link/rawfile"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
)
const (
diff --git a/pkg/tcpip/link/fdbased/mmap.go b/pkg/tcpip/link/fdbased/mmap.go
index 6b7f2a185..fe19c2bc2 100644
--- a/pkg/tcpip/link/fdbased/mmap.go
+++ b/pkg/tcpip/link/fdbased/mmap.go
@@ -16,7 +16,7 @@
package fdbased
-import "gvisor.googlesource.com/gvisor/pkg/tcpip"
+import "gvisor.dev/gvisor/pkg/tcpip"
// Stubbed out version for non-linux/non-amd64 platforms.
diff --git a/pkg/tcpip/link/fdbased/mmap_amd64.go b/pkg/tcpip/link/fdbased/mmap_amd64.go
index 1c2d8c468..8bbb4f9ab 100644
--- a/pkg/tcpip/link/fdbased/mmap_amd64.go
+++ b/pkg/tcpip/link/fdbased/mmap_amd64.go
@@ -21,10 +21,10 @@ import (
"syscall"
"golang.org/x/sys/unix"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/rawfile"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/link/rawfile"
)
const (
diff --git a/pkg/tcpip/link/fdbased/packet_dispatchers.go b/pkg/tcpip/link/fdbased/packet_dispatchers.go
index 1ae0e3359..7ca217e5b 100644
--- a/pkg/tcpip/link/fdbased/packet_dispatchers.go
+++ b/pkg/tcpip/link/fdbased/packet_dispatchers.go
@@ -19,11 +19,11 @@ package fdbased
import (
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/rawfile"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/link/rawfile"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
)
// BufConfig defines the shape of the vectorised view used to read packets from the NIC.
diff --git a/pkg/tcpip/link/loopback/BUILD b/pkg/tcpip/link/loopback/BUILD
index 710a05ede..47a54845c 100644
--- a/pkg/tcpip/link/loopback/BUILD
+++ b/pkg/tcpip/link/loopback/BUILD
@@ -5,7 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "loopback",
srcs = ["loopback.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/link/loopback",
+ importpath = "gvisor.dev/gvisor/pkg/tcpip/link/loopback",
visibility = ["//:sandbox"],
deps = [
"//pkg/tcpip",
diff --git a/pkg/tcpip/link/loopback/loopback.go b/pkg/tcpip/link/loopback/loopback.go
index 2c1148123..ab6a53988 100644
--- a/pkg/tcpip/link/loopback/loopback.go
+++ b/pkg/tcpip/link/loopback/loopback.go
@@ -21,9 +21,9 @@
package loopback
import (
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
)
type endpoint struct {
diff --git a/pkg/tcpip/link/muxed/BUILD b/pkg/tcpip/link/muxed/BUILD
index 84cfae784..ea12ef1ac 100644
--- a/pkg/tcpip/link/muxed/BUILD
+++ b/pkg/tcpip/link/muxed/BUILD
@@ -1,13 +1,11 @@
load("//tools/go_stateify:defs.bzl", "go_library", "go_test")
-package(
- licenses = ["notice"], # Apache 2.0
-)
+package(licenses = ["notice"])
go_library(
name = "muxed",
srcs = ["injectable.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/link/muxed",
+ importpath = "gvisor.dev/gvisor/pkg/tcpip/link/muxed",
visibility = [
"//visibility:public",
],
diff --git a/pkg/tcpip/link/muxed/injectable.go b/pkg/tcpip/link/muxed/injectable.go
index be07b7c29..a577a3d52 100644
--- a/pkg/tcpip/link/muxed/injectable.go
+++ b/pkg/tcpip/link/muxed/injectable.go
@@ -16,9 +16,9 @@
package muxed
import (
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
)
// InjectableEndpoint is an injectable multi endpoint. The endpoint has
diff --git a/pkg/tcpip/link/muxed/injectable_test.go b/pkg/tcpip/link/muxed/injectable_test.go
index 5d40dfacc..174b9330f 100644
--- a/pkg/tcpip/link/muxed/injectable_test.go
+++ b/pkg/tcpip/link/muxed/injectable_test.go
@@ -21,11 +21,11 @@ import (
"syscall"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/fdbased"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/link/fdbased"
+ "gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
)
func TestInjectableEndpointRawDispatch(t *testing.T) {
diff --git a/pkg/tcpip/link/rawfile/BUILD b/pkg/tcpip/link/rawfile/BUILD
index f01bb2c07..6e3a7a9d7 100644
--- a/pkg/tcpip/link/rawfile/BUILD
+++ b/pkg/tcpip/link/rawfile/BUILD
@@ -11,7 +11,7 @@ go_library(
"errors.go",
"rawfile_unsafe.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/link/rawfile",
+ importpath = "gvisor.dev/gvisor/pkg/tcpip/link/rawfile",
visibility = [
"//visibility:public",
],
diff --git a/pkg/tcpip/link/rawfile/errors.go b/pkg/tcpip/link/rawfile/errors.go
index 8bde41637..a0a873c84 100644
--- a/pkg/tcpip/link/rawfile/errors.go
+++ b/pkg/tcpip/link/rawfile/errors.go
@@ -20,7 +20,7 @@ import (
"fmt"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip"
)
const maxErrno = 134
@@ -30,7 +30,7 @@ var translations [maxErrno]*tcpip.Error
// TranslateErrno translate an errno from the syscall package into a
// *tcpip.Error.
//
-// Valid, but unreconigized errnos will be translated to
+// Valid, but unrecognized errnos will be translated to
// tcpip.ErrInvalidEndpointState (EINVAL). Panics on invalid errnos.
func TranslateErrno(e syscall.Errno) *tcpip.Error {
if err := translations[e]; err != nil {
diff --git a/pkg/tcpip/link/rawfile/rawfile_unsafe.go b/pkg/tcpip/link/rawfile/rawfile_unsafe.go
index 86db7a487..e3fbb15c2 100644
--- a/pkg/tcpip/link/rawfile/rawfile_unsafe.go
+++ b/pkg/tcpip/link/rawfile/rawfile_unsafe.go
@@ -22,7 +22,7 @@ import (
"syscall"
"unsafe"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip"
)
// GetMTU determines the MTU of a network interface device.
@@ -110,7 +110,7 @@ type PollEvent struct {
// BlockingRead reads from a file descriptor that is set up as non-blocking. If
// no data is available, it will block in a poll() syscall until the file
-// descirptor becomes readable.
+// descriptor becomes readable.
func BlockingRead(fd int, b []byte) (int, *tcpip.Error) {
for {
n, _, e := syscall.RawSyscall(syscall.SYS_READ, uintptr(fd), uintptr(unsafe.Pointer(&b[0])), uintptr(len(b)))
diff --git a/pkg/tcpip/link/sharedmem/BUILD b/pkg/tcpip/link/sharedmem/BUILD
index dc8f1543e..f2998aa98 100644
--- a/pkg/tcpip/link/sharedmem/BUILD
+++ b/pkg/tcpip/link/sharedmem/BUILD
@@ -10,7 +10,7 @@ go_library(
"sharedmem_unsafe.go",
"tx.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sharedmem",
+ importpath = "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem",
visibility = [
"//:sandbox",
],
diff --git a/pkg/tcpip/link/sharedmem/pipe/BUILD b/pkg/tcpip/link/sharedmem/pipe/BUILD
index 85deafa38..94725cb11 100644
--- a/pkg/tcpip/link/sharedmem/pipe/BUILD
+++ b/pkg/tcpip/link/sharedmem/pipe/BUILD
@@ -10,7 +10,7 @@ go_library(
"rx.go",
"tx.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sharedmem/pipe",
+ importpath = "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/pipe",
visibility = ["//:sandbox"],
)
diff --git a/pkg/tcpip/link/sharedmem/queue/BUILD b/pkg/tcpip/link/sharedmem/queue/BUILD
index d7dc631eb..160a8f864 100644
--- a/pkg/tcpip/link/sharedmem/queue/BUILD
+++ b/pkg/tcpip/link/sharedmem/queue/BUILD
@@ -8,7 +8,7 @@ go_library(
"rx.go",
"tx.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sharedmem/queue",
+ importpath = "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/queue",
visibility = ["//:sandbox"],
deps = [
"//pkg/log",
diff --git a/pkg/tcpip/link/sharedmem/queue/queue_test.go b/pkg/tcpip/link/sharedmem/queue/queue_test.go
index d3f8f4b8b..9a0aad5d7 100644
--- a/pkg/tcpip/link/sharedmem/queue/queue_test.go
+++ b/pkg/tcpip/link/sharedmem/queue/queue_test.go
@@ -19,7 +19,7 @@ import (
"reflect"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sharedmem/pipe"
+ "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/pipe"
)
func TestBasicTxQueue(t *testing.T) {
diff --git a/pkg/tcpip/link/sharedmem/queue/rx.go b/pkg/tcpip/link/sharedmem/queue/rx.go
index d9aecf2d9..696e6c9e5 100644
--- a/pkg/tcpip/link/sharedmem/queue/rx.go
+++ b/pkg/tcpip/link/sharedmem/queue/rx.go
@@ -20,8 +20,8 @@ import (
"encoding/binary"
"sync/atomic"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sharedmem/pipe"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/pipe"
)
const (
diff --git a/pkg/tcpip/link/sharedmem/queue/tx.go b/pkg/tcpip/link/sharedmem/queue/tx.go
index a24dccd11..beffe807b 100644
--- a/pkg/tcpip/link/sharedmem/queue/tx.go
+++ b/pkg/tcpip/link/sharedmem/queue/tx.go
@@ -17,8 +17,8 @@ package queue
import (
"encoding/binary"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sharedmem/pipe"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/pipe"
)
const (
diff --git a/pkg/tcpip/link/sharedmem/rx.go b/pkg/tcpip/link/sharedmem/rx.go
index 215cb607f..eec11e4cb 100644
--- a/pkg/tcpip/link/sharedmem/rx.go
+++ b/pkg/tcpip/link/sharedmem/rx.go
@@ -20,8 +20,8 @@ import (
"sync/atomic"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/rawfile"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sharedmem/queue"
+ "gvisor.dev/gvisor/pkg/tcpip/link/rawfile"
+ "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/queue"
)
// rx holds all state associated with an rx queue.
diff --git a/pkg/tcpip/link/sharedmem/sharedmem.go b/pkg/tcpip/link/sharedmem/sharedmem.go
index e34b780f8..834ea5c40 100644
--- a/pkg/tcpip/link/sharedmem/sharedmem.go
+++ b/pkg/tcpip/link/sharedmem/sharedmem.go
@@ -27,12 +27,12 @@ import (
"sync/atomic"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sharedmem/queue"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/queue"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
)
// QueueConfig holds all the file descriptors needed to describe a tx or rx
diff --git a/pkg/tcpip/link/sharedmem/sharedmem_test.go b/pkg/tcpip/link/sharedmem/sharedmem_test.go
index 65b9d7085..98036f367 100644
--- a/pkg/tcpip/link/sharedmem/sharedmem_test.go
+++ b/pkg/tcpip/link/sharedmem/sharedmem_test.go
@@ -27,12 +27,12 @@ import (
"testing"
"time"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sharedmem/pipe"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sharedmem/queue"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/pipe"
+ "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/queue"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
)
const (
@@ -636,7 +636,7 @@ func TestSimpleReceive(t *testing.T) {
syscall.Write(c.rxCfg.EventFD, []byte{1, 0, 0, 0, 0, 0, 0, 0})
// Wait for packet to be received, then check it.
- c.waitForPackets(1, time.After(time.Second), "Error waiting for packet")
+ c.waitForPackets(1, time.After(5*time.Second), "Timeout waiting for packet")
c.mu.Lock()
rcvd := []byte(c.packets[0].vv.First())
c.packets = c.packets[:0]
diff --git a/pkg/tcpip/link/sharedmem/tx.go b/pkg/tcpip/link/sharedmem/tx.go
index ac3577aa6..6b8d7859d 100644
--- a/pkg/tcpip/link/sharedmem/tx.go
+++ b/pkg/tcpip/link/sharedmem/tx.go
@@ -18,7 +18,7 @@ import (
"math"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sharedmem/queue"
+ "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/queue"
)
const (
diff --git a/pkg/tcpip/link/sniffer/BUILD b/pkg/tcpip/link/sniffer/BUILD
index 7d0d1781e..1756114e6 100644
--- a/pkg/tcpip/link/sniffer/BUILD
+++ b/pkg/tcpip/link/sniffer/BUILD
@@ -8,7 +8,7 @@ go_library(
"pcap.go",
"sniffer.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer",
+ importpath = "gvisor.dev/gvisor/pkg/tcpip/link/sniffer",
visibility = [
"//visibility:public",
],
diff --git a/pkg/tcpip/link/sniffer/sniffer.go b/pkg/tcpip/link/sniffer/sniffer.go
index 98581e50e..fc584c6a4 100644
--- a/pkg/tcpip/link/sniffer/sniffer.go
+++ b/pkg/tcpip/link/sniffer/sniffer.go
@@ -29,11 +29,11 @@ import (
"sync/atomic"
"time"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
)
// LogPackets is a flag used to enable or disable packet logging via the log
diff --git a/pkg/tcpip/link/tun/BUILD b/pkg/tcpip/link/tun/BUILD
index e54852d3f..92dce8fac 100644
--- a/pkg/tcpip/link/tun/BUILD
+++ b/pkg/tcpip/link/tun/BUILD
@@ -5,7 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "tun",
srcs = ["tun_unsafe.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/link/tun",
+ importpath = "gvisor.dev/gvisor/pkg/tcpip/link/tun",
visibility = [
"//visibility:public",
],
diff --git a/pkg/tcpip/link/waitable/BUILD b/pkg/tcpip/link/waitable/BUILD
index 89a9eee23..2597d4b3e 100644
--- a/pkg/tcpip/link/waitable/BUILD
+++ b/pkg/tcpip/link/waitable/BUILD
@@ -7,7 +7,7 @@ go_library(
srcs = [
"waitable.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/link/waitable",
+ importpath = "gvisor.dev/gvisor/pkg/tcpip/link/waitable",
visibility = [
"//visibility:public",
],
diff --git a/pkg/tcpip/link/waitable/waitable.go b/pkg/tcpip/link/waitable/waitable.go
index 21690a226..3b6ac2ff7 100644
--- a/pkg/tcpip/link/waitable/waitable.go
+++ b/pkg/tcpip/link/waitable/waitable.go
@@ -22,10 +22,10 @@
package waitable
import (
- "gvisor.googlesource.com/gvisor/pkg/gate"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/gate"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
)
// Endpoint is a waitable link-layer endpoint.
diff --git a/pkg/tcpip/link/waitable/waitable_test.go b/pkg/tcpip/link/waitable/waitable_test.go
index 62054fb7f..56e18ecb0 100644
--- a/pkg/tcpip/link/waitable/waitable_test.go
+++ b/pkg/tcpip/link/waitable/waitable_test.go
@@ -17,9 +17,9 @@ package waitable
import (
"testing"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
)
type countedEndpoint struct {
diff --git a/pkg/tcpip/network/arp/BUILD b/pkg/tcpip/network/arp/BUILD
index 2a355e689..d95d44f56 100644
--- a/pkg/tcpip/network/arp/BUILD
+++ b/pkg/tcpip/network/arp/BUILD
@@ -5,7 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "arp",
srcs = ["arp.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/network/arp",
+ importpath = "gvisor.dev/gvisor/pkg/tcpip/network/arp",
visibility = [
"//visibility:public",
],
diff --git a/pkg/tcpip/network/arp/arp.go b/pkg/tcpip/network/arp/arp.go
index a3f2bce3e..ca3d6c0bf 100644
--- a/pkg/tcpip/network/arp/arp.go
+++ b/pkg/tcpip/network/arp/arp.go
@@ -26,10 +26,10 @@
package arp
import (
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
)
const (
diff --git a/pkg/tcpip/network/arp/arp_test.go b/pkg/tcpip/network/arp/arp_test.go
index 1b971b1a3..66c55821b 100644
--- a/pkg/tcpip/network/arp/arp_test.go
+++ b/pkg/tcpip/network/arp/arp_test.go
@@ -18,15 +18,15 @@ import (
"testing"
"time"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/channel"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/arp"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/icmp"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/link/channel"
+ "gvisor.dev/gvisor/pkg/tcpip/link/sniffer"
+ "gvisor.dev/gvisor/pkg/tcpip/network/arp"
+ "gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/icmp"
)
const (
diff --git a/pkg/tcpip/network/fragmentation/BUILD b/pkg/tcpip/network/fragmentation/BUILD
index bf0a7b99c..118bfc763 100644
--- a/pkg/tcpip/network/fragmentation/BUILD
+++ b/pkg/tcpip/network/fragmentation/BUILD
@@ -23,7 +23,7 @@ go_library(
"reassembler.go",
"reassembler_list.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/network/fragmentation",
+ importpath = "gvisor.dev/gvisor/pkg/tcpip/network/fragmentation",
visibility = ["//:sandbox"],
deps = [
"//pkg/log",
diff --git a/pkg/tcpip/network/fragmentation/frag_heap.go b/pkg/tcpip/network/fragmentation/frag_heap.go
index 9ad3e5a8a..0b570d25a 100644
--- a/pkg/tcpip/network/fragmentation/frag_heap.go
+++ b/pkg/tcpip/network/fragmentation/frag_heap.go
@@ -18,7 +18,7 @@ import (
"container/heap"
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
)
type fragment struct {
diff --git a/pkg/tcpip/network/fragmentation/frag_heap_test.go b/pkg/tcpip/network/fragmentation/frag_heap_test.go
index 3a2486ba8..9ececcb9f 100644
--- a/pkg/tcpip/network/fragmentation/frag_heap_test.go
+++ b/pkg/tcpip/network/fragmentation/frag_heap_test.go
@@ -19,7 +19,7 @@ import (
"reflect"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
)
var reassambleTestCases = []struct {
diff --git a/pkg/tcpip/network/fragmentation/fragmentation.go b/pkg/tcpip/network/fragmentation/fragmentation.go
index e90edb375..1628a82be 100644
--- a/pkg/tcpip/network/fragmentation/fragmentation.go
+++ b/pkg/tcpip/network/fragmentation/fragmentation.go
@@ -21,7 +21,7 @@ import (
"sync"
"time"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
)
// DefaultReassembleTimeout is based on the linux stack: net.ipv4.ipfrag_time.
@@ -60,7 +60,7 @@ type Fragmentation struct {
// lowMemoryLimit specifies the limit on which we will reach by dropping
// fragments after reaching highMemoryLimit.
//
-// reassemblingTimeout specifes the maximum time allowed to reassemble a packet.
+// reassemblingTimeout specifies the maximum time allowed to reassemble a packet.
// Fragments are lazily evicted only when a new a packet with an
// already existing fragmentation-id arrives after the timeout.
func NewFragmentation(highMemoryLimit, lowMemoryLimit int, reassemblingTimeout time.Duration) *Fragmentation {
@@ -80,7 +80,7 @@ func NewFragmentation(highMemoryLimit, lowMemoryLimit int, reassemblingTimeout t
}
}
-// Process processes an incoming fragment beloning to an ID
+// Process processes an incoming fragment belonging to an ID
// and returns a complete packet when all the packets belonging to that ID have been received.
func (f *Fragmentation) Process(id uint32, first, last uint16, more bool, vv buffer.VectorisedView) (buffer.VectorisedView, bool) {
f.mu.Lock()
diff --git a/pkg/tcpip/network/fragmentation/fragmentation_test.go b/pkg/tcpip/network/fragmentation/fragmentation_test.go
index 99ded68a3..799798544 100644
--- a/pkg/tcpip/network/fragmentation/fragmentation_test.go
+++ b/pkg/tcpip/network/fragmentation/fragmentation_test.go
@@ -19,7 +19,7 @@ import (
"testing"
"time"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
)
// vv is a helper to build VectorisedView from different strings.
diff --git a/pkg/tcpip/network/fragmentation/reassembler.go b/pkg/tcpip/network/fragmentation/reassembler.go
index 04f9ab964..8037f734b 100644
--- a/pkg/tcpip/network/fragmentation/reassembler.go
+++ b/pkg/tcpip/network/fragmentation/reassembler.go
@@ -21,7 +21,7 @@ import (
"sync"
"time"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
)
type hole struct {
diff --git a/pkg/tcpip/network/hash/BUILD b/pkg/tcpip/network/hash/BUILD
index ea520c6ed..e6db5c0b0 100644
--- a/pkg/tcpip/network/hash/BUILD
+++ b/pkg/tcpip/network/hash/BUILD
@@ -5,7 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "hash",
srcs = ["hash.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/network/hash",
+ importpath = "gvisor.dev/gvisor/pkg/tcpip/network/hash",
visibility = ["//visibility:public"],
deps = [
"//pkg/rand",
diff --git a/pkg/tcpip/network/hash/hash.go b/pkg/tcpip/network/hash/hash.go
index 0c91905dc..6a215938b 100644
--- a/pkg/tcpip/network/hash/hash.go
+++ b/pkg/tcpip/network/hash/hash.go
@@ -18,8 +18,8 @@ package hash
import (
"encoding/binary"
- "gvisor.googlesource.com/gvisor/pkg/rand"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/rand"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
)
var hashIV = RandN32(1)[0]
diff --git a/pkg/tcpip/network/ip_test.go b/pkg/tcpip/network/ip_test.go
index 4b822e2c6..db65ee7cc 100644
--- a/pkg/tcpip/network/ip_test.go
+++ b/pkg/tcpip/network/ip_test.go
@@ -17,15 +17,15 @@ package ip_test
import (
"testing"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/loopback"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/udp"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/link/loopback"
+ "gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
+ "gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/tcp"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/udp"
)
const (
diff --git a/pkg/tcpip/network/ipv4/BUILD b/pkg/tcpip/network/ipv4/BUILD
index 1b4f29e0c..be84fa63d 100644
--- a/pkg/tcpip/network/ipv4/BUILD
+++ b/pkg/tcpip/network/ipv4/BUILD
@@ -8,7 +8,7 @@ go_library(
"icmp.go",
"ipv4.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4",
+ importpath = "gvisor.dev/gvisor/pkg/tcpip/network/ipv4",
visibility = [
"//visibility:public",
],
diff --git a/pkg/tcpip/network/ipv4/icmp.go b/pkg/tcpip/network/ipv4/icmp.go
index 770f56c3d..bc7f1c42a 100644
--- a/pkg/tcpip/network/ipv4/icmp.go
+++ b/pkg/tcpip/network/ipv4/icmp.go
@@ -17,9 +17,9 @@ package ipv4
import (
"encoding/binary"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
)
// handleControl handles the case when an ICMP packet contains the headers of
diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go
index 44b1d5b9b..1e3a7425a 100644
--- a/pkg/tcpip/network/ipv4/ipv4.go
+++ b/pkg/tcpip/network/ipv4/ipv4.go
@@ -23,12 +23,12 @@ package ipv4
import (
"sync/atomic"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/fragmentation"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/hash"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/network/fragmentation"
+ "gvisor.dev/gvisor/pkg/tcpip/network/hash"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
)
const (
diff --git a/pkg/tcpip/network/ipv4/ipv4_test.go b/pkg/tcpip/network/ipv4/ipv4_test.go
index 10a939287..3207a3d46 100644
--- a/pkg/tcpip/network/ipv4/ipv4_test.go
+++ b/pkg/tcpip/network/ipv4/ipv4_test.go
@@ -20,16 +20,16 @@ import (
"math/rand"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/channel"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/udp"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/link/channel"
+ "gvisor.dev/gvisor/pkg/tcpip/link/sniffer"
+ "gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/tcp"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/udp"
+ "gvisor.dev/gvisor/pkg/waiter"
)
func TestExcludeBroadcast(t *testing.T) {
diff --git a/pkg/tcpip/network/ipv6/BUILD b/pkg/tcpip/network/ipv6/BUILD
index 247e14e37..fae7f4507 100644
--- a/pkg/tcpip/network/ipv6/BUILD
+++ b/pkg/tcpip/network/ipv6/BUILD
@@ -8,7 +8,7 @@ go_library(
"icmp.go",
"ipv6.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6",
+ importpath = "gvisor.dev/gvisor/pkg/tcpip/network/ipv6",
visibility = [
"//visibility:public",
],
diff --git a/pkg/tcpip/network/ipv6/icmp.go b/pkg/tcpip/network/ipv6/icmp.go
index 9c011e107..5e6a59e91 100644
--- a/pkg/tcpip/network/ipv6/icmp.go
+++ b/pkg/tcpip/network/ipv6/icmp.go
@@ -17,10 +17,10 @@ package ipv6
import (
"encoding/binary"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
)
// handleControl handles the case when an ICMP packet contains the headers of
diff --git a/pkg/tcpip/network/ipv6/icmp_test.go b/pkg/tcpip/network/ipv6/icmp_test.go
index d8737a616..d46d68e73 100644
--- a/pkg/tcpip/network/ipv6/icmp_test.go
+++ b/pkg/tcpip/network/ipv6/icmp_test.go
@@ -20,14 +20,14 @@ import (
"strings"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/channel"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/icmp"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/link/channel"
+ "gvisor.dev/gvisor/pkg/tcpip/link/sniffer"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/icmp"
+ "gvisor.dev/gvisor/pkg/waiter"
)
const (
diff --git a/pkg/tcpip/network/ipv6/ipv6.go b/pkg/tcpip/network/ipv6/ipv6.go
index bcae98e1f..27367d6c5 100644
--- a/pkg/tcpip/network/ipv6/ipv6.go
+++ b/pkg/tcpip/network/ipv6/ipv6.go
@@ -21,10 +21,10 @@
package ipv6
import (
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
)
const (
diff --git a/pkg/tcpip/ports/BUILD b/pkg/tcpip/ports/BUILD
index 3ee80c62b..989058413 100644
--- a/pkg/tcpip/ports/BUILD
+++ b/pkg/tcpip/ports/BUILD
@@ -5,7 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "ports",
srcs = ["ports.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/ports",
+ importpath = "gvisor.dev/gvisor/pkg/tcpip/ports",
visibility = ["//:sandbox"],
deps = [
"//pkg/tcpip",
diff --git a/pkg/tcpip/ports/ports.go b/pkg/tcpip/ports/ports.go
index a1712b590..315780c0c 100644
--- a/pkg/tcpip/ports/ports.go
+++ b/pkg/tcpip/ports/ports.go
@@ -20,7 +20,7 @@ import (
"math/rand"
"sync"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip"
)
const (
diff --git a/pkg/tcpip/ports/ports_test.go b/pkg/tcpip/ports/ports_test.go
index 8466c661b..689401661 100644
--- a/pkg/tcpip/ports/ports_test.go
+++ b/pkg/tcpip/ports/ports_test.go
@@ -17,7 +17,7 @@ package ports
import (
"testing"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip"
)
const (
diff --git a/pkg/tcpip/sample/tun_tcp_connect/main.go b/pkg/tcpip/sample/tun_tcp_connect/main.go
index 1fa899e7e..3ac381631 100644
--- a/pkg/tcpip/sample/tun_tcp_connect/main.go
+++ b/pkg/tcpip/sample/tun_tcp_connect/main.go
@@ -50,16 +50,16 @@ import (
"strconv"
"time"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/fdbased"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/rawfile"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/tun"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/link/fdbased"
+ "gvisor.dev/gvisor/pkg/tcpip/link/rawfile"
+ "gvisor.dev/gvisor/pkg/tcpip/link/sniffer"
+ "gvisor.dev/gvisor/pkg/tcpip/link/tun"
+ "gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/tcp"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// writer reads from standard input and writes to the endpoint until standard
diff --git a/pkg/tcpip/sample/tun_tcp_echo/main.go b/pkg/tcpip/sample/tun_tcp_echo/main.go
index d47085581..da425394a 100644
--- a/pkg/tcpip/sample/tun_tcp_echo/main.go
+++ b/pkg/tcpip/sample/tun_tcp_echo/main.go
@@ -29,16 +29,16 @@ import (
"strings"
"time"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/fdbased"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/rawfile"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/tun"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/arp"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/link/fdbased"
+ "gvisor.dev/gvisor/pkg/tcpip/link/rawfile"
+ "gvisor.dev/gvisor/pkg/tcpip/link/tun"
+ "gvisor.dev/gvisor/pkg/tcpip/network/arp"
+ "gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
+ "gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/tcp"
+ "gvisor.dev/gvisor/pkg/waiter"
)
var tap = flag.Bool("tap", false, "use tap istead of tun")
diff --git a/pkg/tcpip/seqnum/BUILD b/pkg/tcpip/seqnum/BUILD
index a63665efc..76b5f4ffa 100644
--- a/pkg/tcpip/seqnum/BUILD
+++ b/pkg/tcpip/seqnum/BUILD
@@ -5,7 +5,7 @@ load("//tools/go_stateify:defs.bzl", "go_library")
go_library(
name = "seqnum",
srcs = ["seqnum.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum",
+ importpath = "gvisor.dev/gvisor/pkg/tcpip/seqnum",
visibility = [
"//visibility:public",
],
diff --git a/pkg/tcpip/stack/BUILD b/pkg/tcpip/stack/BUILD
index 551c3c73e..28d11c797 100644
--- a/pkg/tcpip/stack/BUILD
+++ b/pkg/tcpip/stack/BUILD
@@ -13,7 +13,7 @@ go_library(
"stack_global_state.go",
"transport_demuxer.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/stack",
+ importpath = "gvisor.dev/gvisor/pkg/tcpip/stack",
visibility = [
"//visibility:public",
],
diff --git a/pkg/tcpip/stack/linkaddrcache.go b/pkg/tcpip/stack/linkaddrcache.go
index b952ad20f..77bb0ccb9 100644
--- a/pkg/tcpip/stack/linkaddrcache.go
+++ b/pkg/tcpip/stack/linkaddrcache.go
@@ -19,8 +19,8 @@ import (
"sync"
"time"
- "gvisor.googlesource.com/gvisor/pkg/sleep"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/sleep"
+ "gvisor.dev/gvisor/pkg/tcpip"
)
const linkAddrCacheSize = 512 // max cache entries
diff --git a/pkg/tcpip/stack/linkaddrcache_test.go b/pkg/tcpip/stack/linkaddrcache_test.go
index 91b2ffea8..924f4d240 100644
--- a/pkg/tcpip/stack/linkaddrcache_test.go
+++ b/pkg/tcpip/stack/linkaddrcache_test.go
@@ -20,8 +20,8 @@ import (
"testing"
"time"
- "gvisor.googlesource.com/gvisor/pkg/sleep"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/sleep"
+ "gvisor.dev/gvisor/pkg/tcpip"
)
type testaddr struct {
diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go
index 50d35de88..30c0dee42 100644
--- a/pkg/tcpip/stack/nic.go
+++ b/pkg/tcpip/stack/nic.go
@@ -19,10 +19,10 @@ import (
"sync"
"sync/atomic"
- "gvisor.googlesource.com/gvisor/pkg/ilist"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/ilist"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
)
// NIC represents a "network interface card" to which the networking stack is
diff --git a/pkg/tcpip/stack/registration.go b/pkg/tcpip/stack/registration.go
index c70533a35..0ecaa0833 100644
--- a/pkg/tcpip/stack/registration.go
+++ b/pkg/tcpip/stack/registration.go
@@ -17,10 +17,10 @@ package stack
import (
"sync"
- "gvisor.googlesource.com/gvisor/pkg/sleep"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/sleep"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// NetworkEndpointID is the identifier of a network layer protocol endpoint.
diff --git a/pkg/tcpip/stack/route.go b/pkg/tcpip/stack/route.go
index 55ed02479..36d7b6ac7 100644
--- a/pkg/tcpip/stack/route.go
+++ b/pkg/tcpip/stack/route.go
@@ -15,10 +15,10 @@
package stack
import (
- "gvisor.googlesource.com/gvisor/pkg/sleep"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/sleep"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
)
// Route represents a route through the networking stack to a given destination.
diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go
index 9d8e8cda5..2d7f56ca9 100644
--- a/pkg/tcpip/stack/stack.go
+++ b/pkg/tcpip/stack/stack.go
@@ -28,13 +28,13 @@ import (
"sync"
"time"
- "gvisor.googlesource.com/gvisor/pkg/sleep"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/ports"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/sleep"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/ports"
+ "gvisor.dev/gvisor/pkg/tcpip/seqnum"
+ "gvisor.dev/gvisor/pkg/waiter"
)
const (
@@ -225,6 +225,45 @@ type TCPSACKInfo struct {
MaxSACKED seqnum.Value
}
+// RcvBufAutoTuneParams holds state related to TCP receive buffer auto-tuning.
+type RcvBufAutoTuneParams struct {
+ // MeasureTime is the time at which the current measurement
+ // was started.
+ MeasureTime time.Time
+
+ // CopiedBytes is the number of bytes copied to user space since
+ // this measure began.
+ CopiedBytes int
+
+ // PrevCopiedBytes is the number of bytes copied to user space in
+ // the previous RTT period.
+ PrevCopiedBytes int
+
+ // RcvBufSize is the auto tuned receive buffer size.
+ RcvBufSize int
+
+ // RTT is the smoothed RTT as measured by observing the time between
+ // when a byte is first acknowledged and the receipt of data that is at
+ // least one window beyond the sequence number that was acknowledged.
+ RTT time.Duration
+
+ // RTTVar is the "round-trip time variation" as defined in section 2
+ // of RFC6298.
+ RTTVar time.Duration
+
+ // RTTMeasureSeqNumber is the highest acceptable sequence number at the
+ // time this RTT measurement period began.
+ RTTMeasureSeqNumber seqnum.Value
+
+ // RTTMeasureTime is the absolute time at which the current RTT
+ // measurement period began.
+ RTTMeasureTime time.Time
+
+ // Disabled is true if an explicit receive buffer is set for the
+ // endpoint.
+ Disabled bool
+}
+
// TCPEndpointState is a copy of the internal state of a TCP endpoint.
type TCPEndpointState struct {
// ID is a copy of the TransportEndpointID for the endpoint.
@@ -240,6 +279,10 @@ type TCPEndpointState struct {
// buffer for the endpoint.
RcvBufUsed int
+ // RcvBufAutoTuneParams is used to hold state variables to compute
+ // the auto tuned receive buffer size.
+ RcvAutoParams RcvBufAutoTuneParams
+
// RcvClosed if true, indicates the endpoint has been closed for reading.
RcvClosed bool
diff --git a/pkg/tcpip/stack/stack_test.go b/pkg/tcpip/stack/stack_test.go
index 351f63221..69884af03 100644
--- a/pkg/tcpip/stack/stack_test.go
+++ b/pkg/tcpip/stack/stack_test.go
@@ -24,11 +24,11 @@ import (
"strings"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/channel"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/link/channel"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
)
const (
diff --git a/pkg/tcpip/stack/transport_demuxer.go b/pkg/tcpip/stack/transport_demuxer.go
index 605bfadeb..cf8a6d129 100644
--- a/pkg/tcpip/stack/transport_demuxer.go
+++ b/pkg/tcpip/stack/transport_demuxer.go
@@ -19,10 +19,10 @@ import (
"math/rand"
"sync"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/hash/jenkins"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/hash/jenkins"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
)
type protocolIDs struct {
diff --git a/pkg/tcpip/stack/transport_test.go b/pkg/tcpip/stack/transport_test.go
index e8a9392b5..788ffcc8c 100644
--- a/pkg/tcpip/stack/transport_test.go
+++ b/pkg/tcpip/stack/transport_test.go
@@ -17,12 +17,12 @@ package stack_test
import (
"testing"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/channel"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/loopback"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/link/channel"
+ "gvisor.dev/gvisor/pkg/tcpip/link/loopback"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/waiter"
)
const (
@@ -192,6 +192,9 @@ func (f *fakeTransportEndpoint) State() uint32 {
return 0
}
+func (f *fakeTransportEndpoint) ModerateRecvBuf(copied int) {
+}
+
type fakeTransportGoodOption bool
type fakeTransportBadOption bool
diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go
index 85ef014d0..c4076666a 100644
--- a/pkg/tcpip/tcpip.go
+++ b/pkg/tcpip/tcpip.go
@@ -38,8 +38,8 @@ import (
"sync/atomic"
"time"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// Error represents an error in the netstack error space. Using a special type
@@ -66,43 +66,44 @@ func (e *Error) IgnoreStats() bool {
// Errors that can be returned by the network stack.
var (
- ErrUnknownProtocol = &Error{msg: "unknown protocol"}
- ErrUnknownNICID = &Error{msg: "unknown nic id"}
- ErrUnknownDevice = &Error{msg: "unknown device"}
- ErrUnknownProtocolOption = &Error{msg: "unknown option for protocol"}
- ErrDuplicateNICID = &Error{msg: "duplicate nic id"}
- ErrDuplicateAddress = &Error{msg: "duplicate address"}
- ErrNoRoute = &Error{msg: "no route"}
- ErrBadLinkEndpoint = &Error{msg: "bad link layer endpoint"}
- ErrAlreadyBound = &Error{msg: "endpoint already bound", ignoreStats: true}
- ErrInvalidEndpointState = &Error{msg: "endpoint is in invalid state"}
- ErrAlreadyConnecting = &Error{msg: "endpoint is already connecting", ignoreStats: true}
- ErrAlreadyConnected = &Error{msg: "endpoint is already connected", ignoreStats: true}
- ErrNoPortAvailable = &Error{msg: "no ports are available"}
- ErrPortInUse = &Error{msg: "port is in use"}
- ErrBadLocalAddress = &Error{msg: "bad local address"}
- ErrClosedForSend = &Error{msg: "endpoint is closed for send"}
- ErrClosedForReceive = &Error{msg: "endpoint is closed for receive"}
- ErrWouldBlock = &Error{msg: "operation would block", ignoreStats: true}
- ErrConnectionRefused = &Error{msg: "connection was refused"}
- ErrTimeout = &Error{msg: "operation timed out"}
- ErrAborted = &Error{msg: "operation aborted"}
- ErrConnectStarted = &Error{msg: "connection attempt started", ignoreStats: true}
- ErrDestinationRequired = &Error{msg: "destination address is required"}
- ErrNotSupported = &Error{msg: "operation not supported"}
- ErrQueueSizeNotSupported = &Error{msg: "queue size querying not supported"}
- ErrNotConnected = &Error{msg: "endpoint not connected"}
- ErrConnectionReset = &Error{msg: "connection reset by peer"}
- ErrConnectionAborted = &Error{msg: "connection aborted"}
- ErrNoSuchFile = &Error{msg: "no such file"}
- ErrInvalidOptionValue = &Error{msg: "invalid option value specified"}
- ErrNoLinkAddress = &Error{msg: "no remote link address"}
- ErrBadAddress = &Error{msg: "bad address"}
- ErrNetworkUnreachable = &Error{msg: "network is unreachable"}
- ErrMessageTooLong = &Error{msg: "message too long"}
- ErrNoBufferSpace = &Error{msg: "no buffer space available"}
- ErrBroadcastDisabled = &Error{msg: "broadcast socket option disabled"}
- ErrNotPermitted = &Error{msg: "operation not permitted"}
+ ErrUnknownProtocol = &Error{msg: "unknown protocol"}
+ ErrUnknownNICID = &Error{msg: "unknown nic id"}
+ ErrUnknownDevice = &Error{msg: "unknown device"}
+ ErrUnknownProtocolOption = &Error{msg: "unknown option for protocol"}
+ ErrDuplicateNICID = &Error{msg: "duplicate nic id"}
+ ErrDuplicateAddress = &Error{msg: "duplicate address"}
+ ErrNoRoute = &Error{msg: "no route"}
+ ErrBadLinkEndpoint = &Error{msg: "bad link layer endpoint"}
+ ErrAlreadyBound = &Error{msg: "endpoint already bound", ignoreStats: true}
+ ErrInvalidEndpointState = &Error{msg: "endpoint is in invalid state"}
+ ErrAlreadyConnecting = &Error{msg: "endpoint is already connecting", ignoreStats: true}
+ ErrAlreadyConnected = &Error{msg: "endpoint is already connected", ignoreStats: true}
+ ErrNoPortAvailable = &Error{msg: "no ports are available"}
+ ErrPortInUse = &Error{msg: "port is in use"}
+ ErrBadLocalAddress = &Error{msg: "bad local address"}
+ ErrClosedForSend = &Error{msg: "endpoint is closed for send"}
+ ErrClosedForReceive = &Error{msg: "endpoint is closed for receive"}
+ ErrWouldBlock = &Error{msg: "operation would block", ignoreStats: true}
+ ErrConnectionRefused = &Error{msg: "connection was refused"}
+ ErrTimeout = &Error{msg: "operation timed out"}
+ ErrAborted = &Error{msg: "operation aborted"}
+ ErrConnectStarted = &Error{msg: "connection attempt started", ignoreStats: true}
+ ErrDestinationRequired = &Error{msg: "destination address is required"}
+ ErrNotSupported = &Error{msg: "operation not supported"}
+ ErrQueueSizeNotSupported = &Error{msg: "queue size querying not supported"}
+ ErrNotConnected = &Error{msg: "endpoint not connected"}
+ ErrConnectionReset = &Error{msg: "connection reset by peer"}
+ ErrConnectionAborted = &Error{msg: "connection aborted"}
+ ErrNoSuchFile = &Error{msg: "no such file"}
+ ErrInvalidOptionValue = &Error{msg: "invalid option value specified"}
+ ErrNoLinkAddress = &Error{msg: "no remote link address"}
+ ErrBadAddress = &Error{msg: "bad address"}
+ ErrNetworkUnreachable = &Error{msg: "network is unreachable"}
+ ErrMessageTooLong = &Error{msg: "message too long"}
+ ErrNoBufferSpace = &Error{msg: "no buffer space available"}
+ ErrBroadcastDisabled = &Error{msg: "broadcast socket option disabled"}
+ ErrNotPermitted = &Error{msg: "operation not permitted"}
+ ErrAddressFamilyNotSupported = &Error{msg: "address family not supported by protocol"}
)
// Errors related to Subnet
@@ -339,6 +340,10 @@ type Endpoint interface {
// get the actual result. The first call to Connect after the socket has
// connected returns nil. Calling connect again results in ErrAlreadyConnected.
// Anything else -- the attempt to connect failed.
+ //
+ // If address.Addr is empty, this means that Enpoint has to be
+ // disconnected if this is supported, otherwise
+ // ErrAddressFamilyNotSupported must be returned.
Connect(address FullAddress) *Error
// Shutdown closes the read and/or write end of the endpoint connection
@@ -381,6 +386,13 @@ type Endpoint interface {
// State returns a socket's lifecycle state. The returned value is
// protocol-specific and is primarily used for diagnostics.
State() uint32
+
+ // ModerateRecvBuf should be called everytime data is copied to the user
+ // space. This allows for dynamic tuning of recv buffer space for a
+ // given socket.
+ //
+ // NOTE: This method is a no-op for sockets other than TCP.
+ ModerateRecvBuf(copied int)
}
// WriteOptions contains options for Endpoint.Write.
@@ -472,6 +484,22 @@ type KeepaliveIntervalOption time.Duration
// closed.
type KeepaliveCountOption int
+// CongestionControlOption is used by SetSockOpt/GetSockOpt to set/get
+// the current congestion control algorithm.
+type CongestionControlOption string
+
+// AvailableCongestionControlOption is used to query the supported congestion
+// control algorithms.
+type AvailableCongestionControlOption string
+
+// ModerateReceiveBufferOption allows the caller to enable/disable TCP receive
+// buffer moderation.
+type ModerateReceiveBufferOption bool
+
+// MaxSegOption is used by SetSockOpt/GetSockOpt to set/get the current
+// Maximum Segment Size(MSS) value as specified using the TCP_MAXSEG option.
+type MaxSegOption int
+
// MulticastTTLOption is used by SetSockOpt/GetSockOpt to control the default
// TTL value for multicast messages. The default is 1.
type MulticastTTLOption uint8
@@ -515,7 +543,7 @@ type BroadcastOption int
// Route is a row in the routing table. It specifies through which NIC (and
// gateway) sets of packets should be routed. A row is considered viable if the
-// masked target address matches the destination adddress in the row.
+// masked target address matches the destination address in the row.
type Route struct {
// Destination is the address that must be matched against the masked
// target address to check if this row is viable.
diff --git a/pkg/tcpip/transport/icmp/BUILD b/pkg/tcpip/transport/icmp/BUILD
index 84a2b53b7..62182a3e6 100644
--- a/pkg/tcpip/transport/icmp/BUILD
+++ b/pkg/tcpip/transport/icmp/BUILD
@@ -23,8 +23,8 @@ go_library(
"icmp_packet_list.go",
"protocol.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/icmp",
- imports = ["gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"],
+ importpath = "gvisor.dev/gvisor/pkg/tcpip/transport/icmp",
+ imports = ["gvisor.dev/gvisor/pkg/tcpip/buffer"],
visibility = ["//visibility:public"],
deps = [
"//pkg/sleep",
diff --git a/pkg/tcpip/transport/icmp/endpoint.go b/pkg/tcpip/transport/icmp/endpoint.go
index b8005093a..ab9e80747 100644
--- a/pkg/tcpip/transport/icmp/endpoint.go
+++ b/pkg/tcpip/transport/icmp/endpoint.go
@@ -18,11 +18,11 @@ import (
"encoding/binary"
"sync"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// +stateify savable
@@ -127,6 +127,9 @@ func (e *endpoint) Close() {
e.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.EventIn | waiter.EventOut)
}
+// ModerateRecvBuf implements tcpip.Endpoint.ModerateRecvBuf.
+func (e *endpoint) ModerateRecvBuf(copied int) {}
+
// Read reads data from the endpoint. This method does not block if
// there is no data pending.
func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) {
@@ -419,6 +422,11 @@ func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error {
e.mu.Lock()
defer e.mu.Unlock()
+ if addr.Addr == "" {
+ // AF_UNSPEC isn't supported.
+ return tcpip.ErrAddressFamilyNotSupported
+ }
+
nicid := addr.NIC
localPort := uint16(0)
switch e.state {
diff --git a/pkg/tcpip/transport/icmp/endpoint_state.go b/pkg/tcpip/transport/icmp/endpoint_state.go
index 332b3cd33..99b8c4093 100644
--- a/pkg/tcpip/transport/icmp/endpoint_state.go
+++ b/pkg/tcpip/transport/icmp/endpoint_state.go
@@ -15,9 +15,9 @@
package icmp
import (
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
)
// saveData saves icmpPacket.data field.
diff --git a/pkg/tcpip/transport/icmp/protocol.go b/pkg/tcpip/transport/icmp/protocol.go
index 954fde9d8..c89538131 100644
--- a/pkg/tcpip/transport/icmp/protocol.go
+++ b/pkg/tcpip/transport/icmp/protocol.go
@@ -26,12 +26,12 @@ import (
"encoding/binary"
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/raw"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/raw"
+ "gvisor.dev/gvisor/pkg/waiter"
)
const (
diff --git a/pkg/tcpip/transport/raw/BUILD b/pkg/tcpip/transport/raw/BUILD
index 6d3f0130e..34a14bf7f 100644
--- a/pkg/tcpip/transport/raw/BUILD
+++ b/pkg/tcpip/transport/raw/BUILD
@@ -1,6 +1,4 @@
-package(
- licenses = ["notice"], # Apache 2.0
-)
+package(licenses = ["notice"])
load("//tools/go_generics:defs.bzl", "go_template_instance")
load("//tools/go_stateify:defs.bzl", "go_library")
@@ -24,8 +22,8 @@ go_library(
"endpoint_state.go",
"packet_list.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/raw",
- imports = ["gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"],
+ importpath = "gvisor.dev/gvisor/pkg/tcpip/transport/raw",
+ imports = ["gvisor.dev/gvisor/pkg/tcpip/buffer"],
visibility = ["//visibility:public"],
deps = [
"//pkg/log",
diff --git a/pkg/tcpip/transport/raw/endpoint.go b/pkg/tcpip/transport/raw/endpoint.go
index e4ff50c91..42aded77f 100644
--- a/pkg/tcpip/transport/raw/endpoint.go
+++ b/pkg/tcpip/transport/raw/endpoint.go
@@ -16,7 +16,7 @@
// sockets allow applications to:
//
// * manually write and inspect transport layer headers and payloads
-// * receive all traffic of a given transport protcol (e.g. ICMP or UDP)
+// * receive all traffic of a given transport protocol (e.g. ICMP or UDP)
// * optionally write and inspect network layer and link layer headers for
// packets
//
@@ -29,11 +29,11 @@ package raw
import (
"sync"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// +stateify savable
@@ -147,6 +147,9 @@ func (ep *endpoint) Close() {
ep.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.EventIn | waiter.EventOut)
}
+// ModerateRecvBuf implements tcpip.Endpoint.ModerateRecvBuf.
+func (ep *endpoint) ModerateRecvBuf(copied int) {}
+
// Read implements tcpip.Endpoint.Read.
func (ep *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) {
ep.rcvMu.Lock()
@@ -295,6 +298,11 @@ func (ep *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error {
ep.mu.Lock()
defer ep.mu.Unlock()
+ if addr.Addr == "" {
+ // AF_UNSPEC isn't supported.
+ return tcpip.ErrAddressFamilyNotSupported
+ }
+
if ep.closed {
return tcpip.ErrInvalidEndpointState
}
diff --git a/pkg/tcpip/transport/raw/endpoint_state.go b/pkg/tcpip/transport/raw/endpoint_state.go
index e8907ebb1..cb5534d90 100644
--- a/pkg/tcpip/transport/raw/endpoint_state.go
+++ b/pkg/tcpip/transport/raw/endpoint_state.go
@@ -15,9 +15,9 @@
package raw
import (
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
)
// saveData saves packet.data field.
diff --git a/pkg/tcpip/transport/tcp/BUILD b/pkg/tcpip/transport/tcp/BUILD
index 9db38196b..4cd25e8e2 100644
--- a/pkg/tcpip/transport/tcp/BUILD
+++ b/pkg/tcpip/transport/tcp/BUILD
@@ -21,6 +21,7 @@ go_library(
"accept.go",
"connect.go",
"cubic.go",
+ "cubic_state.go",
"endpoint.go",
"endpoint_state.go",
"forwarder.go",
@@ -38,8 +39,8 @@ go_library(
"tcp_segment_list.go",
"timer.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp",
- imports = ["gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"],
+ importpath = "gvisor.dev/gvisor/pkg/tcpip/transport/tcp",
+ imports = ["gvisor.dev/gvisor/pkg/tcpip/buffer"],
visibility = ["//visibility:public"],
deps = [
"//pkg/rand",
diff --git a/pkg/tcpip/transport/tcp/accept.go b/pkg/tcpip/transport/tcp/accept.go
index d05259c0a..52fd1bfa3 100644
--- a/pkg/tcpip/transport/tcp/accept.go
+++ b/pkg/tcpip/transport/tcp/accept.go
@@ -22,13 +22,13 @@ import (
"sync"
"time"
- "gvisor.googlesource.com/gvisor/pkg/rand"
- "gvisor.googlesource.com/gvisor/pkg/sleep"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/rand"
+ "gvisor.dev/gvisor/pkg/sleep"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/seqnum"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/waiter"
)
const (
@@ -213,6 +213,7 @@ func (l *listenContext) createConnectingEndpoint(s *segment, iss seqnum.Value, i
n.route = s.route.Clone()
n.effectiveNetProtos = []tcpip.NetworkProtocolNumber{s.route.NetProto}
n.rcvBufSize = int(l.rcvWnd)
+ n.amss = mssForRoute(&n.route)
n.maybeEnableTimestamp(rcvdSynOpts)
n.maybeEnableSACKPermitted(rcvdSynOpts)
@@ -232,7 +233,11 @@ func (l *listenContext) createConnectingEndpoint(s *segment, iss seqnum.Value, i
// The receiver at least temporarily has a zero receive window scale,
// but the caller may change it (before starting the protocol loop).
n.snd = newSender(n, iss, irs, s.window, rcvdSynOpts.MSS, rcvdSynOpts.WS)
- n.rcv = newReceiver(n, irs, l.rcvWnd, 0)
+ n.rcv = newReceiver(n, irs, seqnum.Size(n.initialReceiveWindow()), 0, seqnum.Size(n.receiveBufferSize()))
+ // Bootstrap the auto tuning algorithm. Starting at zero will result in
+ // a large step function on the first window adjustment causing the
+ // window to grow to a really large value.
+ n.rcvAutoParams.prevCopied = n.initialReceiveWindow()
return n, nil
}
@@ -249,7 +254,7 @@ func (l *listenContext) createEndpointAndPerformHandshake(s *segment, opts *head
}
// Perform the 3-way handshake.
- h := newHandshake(ep, l.rcvWnd)
+ h := newHandshake(ep, seqnum.Size(ep.initialReceiveWindow()))
h.resetToSynRcvd(cookie, irs, opts)
if err := h.execute(); err != nil {
@@ -359,16 +364,19 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) {
return
}
cookie := ctx.createCookie(s.id, s.sequenceNumber, encodeMSS(opts.MSS))
- // Send SYN with window scaling because we currently
+
+ // Send SYN without window scaling because we currently
// dont't encode this information in the cookie.
//
// Enable Timestamp option if the original syn did have
// the timestamp option specified.
+ mss := mssForRoute(&s.route)
synOpts := header.TCPSynOptions{
WS: -1,
TS: opts.TS,
TSVal: tcpTimeStamp(timeStampOffset()),
TSEcr: opts.TSVal,
+ MSS: uint16(mss),
}
sendSynTCP(&s.route, s.id, header.TCPFlagSyn|header.TCPFlagAck, cookie, s.sequenceNumber+1, ctx.rcvWnd, synOpts)
e.stack.Stats().TCP.ListenOverflowSynCookieSent.Increment()
diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go
index dd671f7ce..00d2ae524 100644
--- a/pkg/tcpip/transport/tcp/connect.go
+++ b/pkg/tcpip/transport/tcp/connect.go
@@ -18,14 +18,14 @@ import (
"sync"
"time"
- "gvisor.googlesource.com/gvisor/pkg/rand"
- "gvisor.googlesource.com/gvisor/pkg/sleep"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/rand"
+ "gvisor.dev/gvisor/pkg/sleep"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/seqnum"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// maxSegmentsPerWake is the maximum number of segments to process in the main
@@ -78,6 +78,9 @@ type handshake struct {
// mss is the maximum segment size received from the peer.
mss uint16
+ // amss is the maximum segment size advertised by us to the peer.
+ amss uint16
+
// sndWndScale is the send window scale, as defined in RFC 1323. A
// negative value means no scaling is supported by the peer.
sndWndScale int
@@ -87,11 +90,24 @@ type handshake struct {
}
func newHandshake(ep *endpoint, rcvWnd seqnum.Size) handshake {
+ rcvWndScale := ep.rcvWndScaleForHandshake()
+
+ // Round-down the rcvWnd to a multiple of wndScale. This ensures that the
+ // window offered in SYN won't be reduced due to the loss of precision if
+ // window scaling is enabled after the handshake.
+ rcvWnd = (rcvWnd >> uint8(rcvWndScale)) << uint8(rcvWndScale)
+
+ // Ensure we can always accept at least 1 byte if the scale specified
+ // was too high for the provided rcvWnd.
+ if rcvWnd == 0 {
+ rcvWnd = 1
+ }
+
h := handshake{
ep: ep,
active: true,
rcvWnd: rcvWnd,
- rcvWndScale: FindWndScale(rcvWnd),
+ rcvWndScale: int(rcvWndScale),
}
h.resetState()
return h
@@ -224,7 +240,7 @@ func (h *handshake) synSentState(s *segment) *tcpip.Error {
h.ep.state = StateSynRecv
h.ep.mu.Unlock()
synOpts := header.TCPSynOptions{
- WS: h.rcvWndScale,
+ WS: int(h.effectiveRcvWndScale()),
TS: rcvSynOpts.TS,
TSVal: h.ep.timestamp(),
TSEcr: h.ep.recentTS,
@@ -233,6 +249,7 @@ func (h *handshake) synSentState(s *segment) *tcpip.Error {
// permits SACK. This is not explicitly defined in the RFC but
// this is the behaviour implemented by Linux.
SACKPermitted: rcvSynOpts.SACKPermitted,
+ MSS: h.ep.amss,
}
sendSynTCP(&s.route, h.ep.id, h.flags, h.iss, h.ackNum, h.rcvWnd, synOpts)
@@ -277,6 +294,7 @@ func (h *handshake) synRcvdState(s *segment) *tcpip.Error {
TSVal: h.ep.timestamp(),
TSEcr: h.ep.recentTS,
SACKPermitted: h.ep.sackPermitted,
+ MSS: h.ep.amss,
}
sendSynTCP(&s.route, h.ep.id, h.flags, h.iss, h.ackNum, h.rcvWnd, synOpts)
return nil
@@ -419,12 +437,15 @@ func (h *handshake) execute() *tcpip.Error {
// Send the initial SYN segment and loop until the handshake is
// completed.
+ h.ep.amss = mssForRoute(&h.ep.route)
+
synOpts := header.TCPSynOptions{
WS: h.rcvWndScale,
TS: true,
TSVal: h.ep.timestamp(),
TSEcr: h.ep.recentTS,
SACKPermitted: bool(sackEnabled),
+ MSS: h.ep.amss,
}
// Execute is also called in a listen context so we want to make sure we
@@ -433,6 +454,11 @@ func (h *handshake) execute() *tcpip.Error {
if h.state == handshakeSynRcvd {
synOpts.TS = h.ep.sendTSOk
synOpts.SACKPermitted = h.ep.sackPermitted && bool(sackEnabled)
+ if h.sndWndScale < 0 {
+ // Disable window scaling if the peer did not send us
+ // the window scaling option.
+ synOpts.WS = -1
+ }
}
sendSynTCP(&h.ep.route, h.ep.id, h.flags, h.iss, h.ackNum, h.rcvWnd, synOpts)
for h.state != handshakeCompleted {
@@ -554,13 +580,6 @@ func makeSynOptions(opts header.TCPSynOptions) []byte {
}
func sendSynTCP(r *stack.Route, id stack.TransportEndpointID, flags byte, seq, ack seqnum.Value, rcvWnd seqnum.Size, opts header.TCPSynOptions) *tcpip.Error {
- // The MSS in opts is automatically calculated as this function is
- // called from many places and we don't want every call point being
- // embedded with the MSS calculation.
- if opts.MSS == 0 {
- opts.MSS = uint16(r.MTU() - header.TCPMinimumSize)
- }
-
options := makeSynOptions(opts)
err := sendTCP(r, id, buffer.VectorisedView{}, r.DefaultTTL(), flags, seq, ack, rcvWnd, options, nil)
putOptions(options)
@@ -861,7 +880,8 @@ func (e *endpoint) protocolMainLoop(handshake bool) *tcpip.Error {
// This is an active connection, so we must initiate the 3-way
// handshake, and then inform potential waiters about its
// completion.
- h := newHandshake(e, seqnum.Size(e.receiveBufferAvailable()))
+ initialRcvWnd := e.initialReceiveWindow()
+ h := newHandshake(e, seqnum.Size(initialRcvWnd))
e.mu.Lock()
h.ep.state = StateSynSent
e.mu.Unlock()
@@ -886,8 +906,14 @@ func (e *endpoint) protocolMainLoop(handshake bool) *tcpip.Error {
// (indicated by a negative send window scale).
e.snd = newSender(e, h.iss, h.ackNum-1, h.sndWnd, h.mss, h.sndWndScale)
+ rcvBufSize := seqnum.Size(e.receiveBufferSize())
e.rcvListMu.Lock()
- e.rcv = newReceiver(e, h.ackNum-1, h.rcvWnd, h.effectiveRcvWndScale())
+ e.rcv = newReceiver(e, h.ackNum-1, h.rcvWnd, h.effectiveRcvWndScale(), rcvBufSize)
+ // boot strap the auto tuning algorithm. Starting at zero will
+ // result in a large step function on the first proper causing
+ // the window to just go to a really large value after the first
+ // RTT itself.
+ e.rcvAutoParams.prevCopied = initialRcvWnd
e.rcvListMu.Unlock()
}
diff --git a/pkg/tcpip/transport/tcp/cubic.go b/pkg/tcpip/transport/tcp/cubic.go
index e618cd2b9..7b1f5e763 100644
--- a/pkg/tcpip/transport/tcp/cubic.go
+++ b/pkg/tcpip/transport/tcp/cubic.go
@@ -23,6 +23,7 @@ import (
// control algorithm state.
//
// See: https://tools.ietf.org/html/rfc8312.
+// +stateify savable
type cubicState struct {
// wLastMax is the previous wMax value.
wLastMax float64
@@ -33,7 +34,7 @@ type cubicState struct {
// t denotes the time when the current congestion avoidance
// was entered.
- t time.Time
+ t time.Time `state:".(unixTime)"`
// numCongestionEvents tracks the number of congestion events since last
// RTO.
diff --git a/pkg/tcpip/transport/tcp/cubic_state.go b/pkg/tcpip/transport/tcp/cubic_state.go
new file mode 100644
index 000000000..d0f58cfaf
--- /dev/null
+++ b/pkg/tcpip/transport/tcp/cubic_state.go
@@ -0,0 +1,29 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tcp
+
+import (
+ "time"
+)
+
+// saveT is invoked by stateify.
+func (c *cubicState) saveT() unixTime {
+ return unixTime{c.t.Unix(), c.t.UnixNano()}
+}
+
+// loadT is invoked by stateify.
+func (c *cubicState) loadT(unix unixTime) {
+ c.t = time.Unix(unix.second, unix.nano)
+}
diff --git a/pkg/tcpip/transport/tcp/dual_stack_test.go b/pkg/tcpip/transport/tcp/dual_stack_test.go
index 43bcfa070..d9f79e8c5 100644
--- a/pkg/tcpip/transport/tcp/dual_stack_test.go
+++ b/pkg/tcpip/transport/tcp/dual_stack_test.go
@@ -18,15 +18,15 @@ import (
"testing"
"time"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/checker"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp/testing/context"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/checker"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
+ "gvisor.dev/gvisor/pkg/tcpip/seqnum"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/tcp"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/tcp/testing/context"
+ "gvisor.dev/gvisor/pkg/waiter"
)
func TestV4MappedConnectOnV6Only(t *testing.T) {
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index 23422ca5e..beb90afb5 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -17,19 +17,20 @@ package tcp
import (
"fmt"
"math"
+ "strings"
"sync"
"sync/atomic"
"time"
- "gvisor.googlesource.com/gvisor/pkg/rand"
- "gvisor.googlesource.com/gvisor/pkg/sleep"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
- "gvisor.googlesource.com/gvisor/pkg/tmutex"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/rand"
+ "gvisor.dev/gvisor/pkg/sleep"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/seqnum"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tmutex"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// EndpointState represents the state of a TCP endpoint.
@@ -116,6 +117,7 @@ const (
notifyDrain
notifyReset
notifyKeepaliveChanged
+ notifyMSSChanged
)
// SACKInfo holds TCP SACK related information for a given endpoint.
@@ -131,6 +133,42 @@ type SACKInfo struct {
NumBlocks int
}
+// rcvBufAutoTuneParams are used to hold state variables to compute
+// the auto tuned recv buffer size.
+//
+// +stateify savable
+type rcvBufAutoTuneParams struct {
+ // measureTime is the time at which the current measurement
+ // was started.
+ measureTime time.Time `state:".(unixTime)"`
+
+ // copied is the number of bytes copied out of the receive
+ // buffers since this measure began.
+ copied int
+
+ // prevCopied is the number of bytes copied out of the receive
+ // buffers in the previous RTT period.
+ prevCopied int
+
+ // rtt is the non-smoothed minimum RTT as measured by observing the time
+ // between when a byte is first acknowledged and the receipt of data
+ // that is at least one window beyond the sequence number that was
+ // acknowledged.
+ rtt time.Duration
+
+ // rttMeasureSeqNumber is the highest acceptable sequence number at the
+ // time this RTT measurement period began.
+ rttMeasureSeqNumber seqnum.Value
+
+ // rttMeasureTime is the absolute time at which the current rtt
+ // measurement period began.
+ rttMeasureTime time.Time `state:".(unixTime)"`
+
+ // disabled is true if an explicit receive buffer is set for the
+ // endpoint.
+ disabled bool
+}
+
// endpoint represents a TCP endpoint. This struct serves as the interface
// between users of the endpoint and the protocol implementation; it is legal to
// have concurrent goroutines make calls into the endpoint, they are properly
@@ -164,18 +202,23 @@ type endpoint struct {
// to indicate to users that no more data is coming.
//
// rcvListMu can be taken after the endpoint mu below.
- rcvListMu sync.Mutex `state:"nosave"`
- rcvList segmentList `state:"wait"`
- rcvClosed bool
- rcvBufSize int
- rcvBufUsed int
+ rcvListMu sync.Mutex `state:"nosave"`
+ rcvList segmentList `state:"wait"`
+ rcvClosed bool
+ rcvBufSize int
+ rcvBufUsed int
+ rcvAutoParams rcvBufAutoTuneParams
+ // zeroWindow indicates that the window was closed due to receive buffer
+ // space being filled up. This is set by the worker goroutine before
+ // moving a segment to the rcvList. This setting is cleared by the
+ // endpoint when a Read() call reads enough data for the new window to
+ // be non-zero.
+ zeroWindow bool
// The following fields are protected by the mutex.
mu sync.RWMutex `state:"nosave"`
id stack.TransportEndpointID
- // state endpointState `state:".(endpointState)"`
- // pState ProtocolState
state EndpointState `state:".(EndpointState)"`
isPortReserved bool `state:"manual"`
@@ -269,6 +312,10 @@ type endpoint struct {
// in SYN-RCVD state.
synRcvdCount int
+ // userMSS if non-zero is the MSS value explicitly set by the user
+ // for this endpoint using the TCP_MAXSEG setsockopt.
+ userMSS int
+
// The following fields are used to manage the send buffer. When
// segments are ready to be sent, they are added to sndQueue and the
// protocol goroutine is signaled via sndWaker.
@@ -286,7 +333,7 @@ type endpoint struct {
// cc stores the name of the Congestion Control algorithm to use for
// this endpoint.
- cc CongestionControlOption
+ cc tcpip.CongestionControlOption
// The following are used when a "packet too big" control packet is
// received. They are protected by sndBufMu. They are used to
@@ -338,6 +385,9 @@ type endpoint struct {
bindAddress tcpip.Address
connectingAddress tcpip.Address
+ // amss is the advertised MSS to the peer by this endpoint.
+ amss uint16
+
gso *stack.GSO
}
@@ -372,8 +422,8 @@ func newEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, waite
netProto: netProto,
waiterQueue: waiterQueue,
state: StateInitial,
- rcvBufSize: DefaultBufferSize,
- sndBufSize: DefaultBufferSize,
+ rcvBufSize: DefaultReceiveBufferSize,
+ sndBufSize: DefaultSendBufferSize,
sndMTU: int(math.MaxInt32),
reuseAddr: true,
keepalive: keepalive{
@@ -394,11 +444,16 @@ func newEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, waite
e.rcvBufSize = rs.Default
}
- var cs CongestionControlOption
+ var cs tcpip.CongestionControlOption
if err := stack.TransportProtocolOption(ProtocolNumber, &cs); err == nil {
e.cc = cs
}
+ var mrb tcpip.ModerateReceiveBufferOption
+ if err := stack.TransportProtocolOption(ProtocolNumber, &mrb); err == nil {
+ e.rcvAutoParams.disabled = !bool(mrb)
+ }
+
if p := stack.GetTCPProbe(); p != nil {
e.probe = p
}
@@ -407,6 +462,7 @@ func newEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, waite
e.workMu.Init()
e.workMu.Lock()
e.tsOffset = timeStampOffset()
+
return e
}
@@ -550,6 +606,83 @@ func (e *endpoint) cleanupLocked() {
tcpip.DeleteDanglingEndpoint(e)
}
+// initialReceiveWindow returns the initial receive window to advertise in the
+// SYN/SYN-ACK.
+func (e *endpoint) initialReceiveWindow() int {
+ rcvWnd := e.receiveBufferAvailable()
+ if rcvWnd > math.MaxUint16 {
+ rcvWnd = math.MaxUint16
+ }
+ routeWnd := InitialCwnd * int(mssForRoute(&e.route)) * 2
+ if rcvWnd > routeWnd {
+ rcvWnd = routeWnd
+ }
+ return rcvWnd
+}
+
+// ModerateRecvBuf adjusts the receive buffer and the advertised window
+// based on the number of bytes copied to user space.
+func (e *endpoint) ModerateRecvBuf(copied int) {
+ e.rcvListMu.Lock()
+ if e.rcvAutoParams.disabled {
+ e.rcvListMu.Unlock()
+ return
+ }
+ now := time.Now()
+ if rtt := e.rcvAutoParams.rtt; rtt == 0 || now.Sub(e.rcvAutoParams.measureTime) < rtt {
+ e.rcvAutoParams.copied += copied
+ e.rcvListMu.Unlock()
+ return
+ }
+ prevRTTCopied := e.rcvAutoParams.copied + copied
+ prevCopied := e.rcvAutoParams.prevCopied
+ rcvWnd := 0
+ if prevRTTCopied > prevCopied {
+ // The minimal receive window based on what was copied by the app
+ // in the immediate preceding RTT and some extra buffer for 16
+ // segments to account for variations.
+ // We multiply by 2 to account for packet losses.
+ rcvWnd = prevRTTCopied*2 + 16*int(e.amss)
+
+ // Scale for slow start based on bytes copied in this RTT vs previous.
+ grow := (rcvWnd * (prevRTTCopied - prevCopied)) / prevCopied
+
+ // Multiply growth factor by 2 again to account for sender being
+ // in slow-start where the sender grows it's congestion window
+ // by 100% per RTT.
+ rcvWnd += grow * 2
+
+ // Make sure auto tuned buffer size can always receive upto 2x
+ // the initial window of 10 segments.
+ if minRcvWnd := int(e.amss) * InitialCwnd * 2; rcvWnd < minRcvWnd {
+ rcvWnd = minRcvWnd
+ }
+
+ // Cap the auto tuned buffer size by the maximum permissible
+ // receive buffer size.
+ if max := e.maxReceiveBufferSize(); rcvWnd > max {
+ rcvWnd = max
+ }
+
+ // We do not adjust downwards as that can cause the receiver to
+ // reject valid data that might already be in flight as the
+ // acceptable window will shrink.
+ if rcvWnd > e.rcvBufSize {
+ e.rcvBufSize = rcvWnd
+ e.notifyProtocolGoroutine(notifyReceiveWindowChanged)
+ }
+
+ // We only update prevCopied when we grow the buffer because in cases
+ // where prevCopied > prevRTTCopied the existing buffer is already big
+ // enough to handle the current rate and we don't need to do any
+ // adjustments.
+ e.rcvAutoParams.prevCopied = prevRTTCopied
+ }
+ e.rcvAutoParams.measureTime = now
+ e.rcvAutoParams.copied = 0
+ e.rcvListMu.Unlock()
+}
+
// Read reads data from the endpoint.
func (e *endpoint) Read(*tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) {
e.mu.RLock()
@@ -595,10 +728,12 @@ func (e *endpoint) readLocked() (buffer.View, *tcpip.Error) {
s.decRef()
}
- scale := e.rcv.rcvWndScale
- wasZero := e.zeroReceiveWindow(scale)
e.rcvBufUsed -= len(v)
- if wasZero && !e.zeroReceiveWindow(scale) {
+ // If the window was zero before this read and if the read freed up
+ // enough buffer space for the scaled window to be non-zero then notify
+ // the protocol goroutine to send a window update.
+ if e.zeroWindow && !e.zeroReceiveWindow(e.rcv.rcvWndScale) {
+ e.zeroWindow = false
e.notifyProtocolGoroutine(notifyNonZeroReceiveWindow)
}
@@ -785,6 +920,17 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error {
}
return nil
+ case tcpip.MaxSegOption:
+ userMSS := v
+ if userMSS < header.TCPMinimumMSS || userMSS > header.TCPMaximumMSS {
+ return tcpip.ErrInvalidOptionValue
+ }
+ e.mu.Lock()
+ e.userMSS = int(userMSS)
+ e.mu.Unlock()
+ e.notifyProtocolGoroutine(notifyMSSChanged)
+ return nil
+
case tcpip.ReceiveBufferSizeOption:
// Make sure the receive buffer size is within the min and max
// allowed.
@@ -818,9 +964,10 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error {
size = math.MaxInt32 / 2
}
- wasZero := e.zeroReceiveWindow(scale)
e.rcvBufSize = size
- if wasZero && !e.zeroReceiveWindow(scale) {
+ e.rcvAutoParams.disabled = true
+ if e.zeroWindow && !e.zeroReceiveWindow(scale) {
+ e.zeroWindow = false
mask |= notifyNonZeroReceiveWindow
}
e.rcvListMu.Unlock()
@@ -898,6 +1045,40 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error {
e.mu.Unlock()
return nil
+ case tcpip.CongestionControlOption:
+ // Query the available cc algorithms in the stack and
+ // validate that the specified algorithm is actually
+ // supported in the stack.
+ var avail tcpip.AvailableCongestionControlOption
+ if err := e.stack.TransportProtocolOption(ProtocolNumber, &avail); err != nil {
+ return err
+ }
+ availCC := strings.Split(string(avail), " ")
+ for _, cc := range availCC {
+ if v == tcpip.CongestionControlOption(cc) {
+ // Acquire the work mutex as we may need to
+ // reinitialize the congestion control state.
+ e.mu.Lock()
+ state := e.state
+ e.cc = v
+ e.mu.Unlock()
+ switch state {
+ case StateEstablished:
+ e.workMu.Lock()
+ e.mu.Lock()
+ if e.state == state {
+ e.snd.cc = e.snd.initCongestionControl(e.cc)
+ }
+ e.mu.Unlock()
+ e.workMu.Unlock()
+ }
+ return nil
+ }
+ }
+
+ // Linux returns ENOENT when an invalid congestion
+ // control algorithm is specified.
+ return tcpip.ErrNoSuchFile
default:
return nil
}
@@ -929,6 +1110,14 @@ func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error {
e.lastErrorMu.Unlock()
return err
+ case *tcpip.MaxSegOption:
+ // This is just stubbed out. Linux never returns the user_mss
+ // value as it either returns the defaultMSS or returns the
+ // actual current MSS. Netstack just returns the defaultMSS
+ // always for now.
+ *o = header.TCPDefaultMSS
+ return nil
+
case *tcpip.SendBufferSizeOption:
e.sndBufMu.Lock()
*o = tcpip.SendBufferSizeOption(e.sndBufSize)
@@ -1067,6 +1256,12 @@ func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error {
}
return nil
+ case *tcpip.CongestionControlOption:
+ e.mu.Lock()
+ *o = e.cc
+ e.mu.Unlock()
+ return nil
+
default:
return tcpip.ErrUnknownProtocolOption
}
@@ -1098,6 +1293,11 @@ func (e *endpoint) checkV4Mapped(addr *tcpip.FullAddress) (tcpip.NetworkProtocol
// Connect connects the endpoint to its peer.
func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error {
+ if addr.Addr == "" && addr.Port == 0 {
+ // AF_UNSPEC isn't supported.
+ return tcpip.ErrAddressFamilyNotSupported
+ }
+
return e.connect(addr, true, true)
}
@@ -1582,6 +1782,13 @@ func (e *endpoint) readyToRead(s *segment) {
if s != nil {
s.incRef()
e.rcvBufUsed += s.data.Size()
+ // Check if the receive window is now closed. If so make sure
+ // we set the zero window before we deliver the segment to ensure
+ // that a subsequent read of the segment will correctly trigger
+ // a non-zero notification.
+ if avail := e.receiveBufferAvailableLocked(); avail>>e.rcv.rcvWndScale == 0 {
+ e.zeroWindow = true
+ }
e.rcvList.PushBack(s)
} else {
e.rcvClosed = true
@@ -1591,21 +1798,26 @@ func (e *endpoint) readyToRead(s *segment) {
e.waiterQueue.Notify(waiter.EventIn)
}
-// receiveBufferAvailable calculates how many bytes are still available in the
-// receive buffer.
-func (e *endpoint) receiveBufferAvailable() int {
- e.rcvListMu.Lock()
- size := e.rcvBufSize
- used := e.rcvBufUsed
- e.rcvListMu.Unlock()
-
+// receiveBufferAvailableLocked calculates how many bytes are still available
+// in the receive buffer.
+// rcvListMu must be held when this function is called.
+func (e *endpoint) receiveBufferAvailableLocked() int {
// We may use more bytes than the buffer size when the receive buffer
// shrinks.
- if used >= size {
+ if e.rcvBufUsed >= e.rcvBufSize {
return 0
}
- return size - used
+ return e.rcvBufSize - e.rcvBufUsed
+}
+
+// receiveBufferAvailable calculates how many bytes are still available in the
+// receive buffer.
+func (e *endpoint) receiveBufferAvailable() int {
+ e.rcvListMu.Lock()
+ available := e.receiveBufferAvailableLocked()
+ e.rcvListMu.Unlock()
+ return available
}
func (e *endpoint) receiveBufferSize() int {
@@ -1616,6 +1828,33 @@ func (e *endpoint) receiveBufferSize() int {
return size
}
+func (e *endpoint) maxReceiveBufferSize() int {
+ var rs ReceiveBufferSizeOption
+ if err := e.stack.TransportProtocolOption(ProtocolNumber, &rs); err != nil {
+ // As a fallback return the hardcoded max buffer size.
+ return MaxBufferSize
+ }
+ return rs.Max
+}
+
+// rcvWndScaleForHandshake computes the receive window scale to offer to the
+// peer when window scaling is enabled (true by default). If auto-tuning is
+// disabled then the window scaling factor is based on the size of the
+// receiveBuffer otherwise we use the max permissible receive buffer size to
+// compute the scale.
+func (e *endpoint) rcvWndScaleForHandshake() int {
+ bufSizeForScale := e.receiveBufferSize()
+
+ e.rcvListMu.Lock()
+ autoTuningDisabled := e.rcvAutoParams.disabled
+ e.rcvListMu.Unlock()
+ if autoTuningDisabled {
+ return FindWndScale(seqnum.Size(bufSizeForScale))
+ }
+
+ return FindWndScale(seqnum.Size(e.maxReceiveBufferSize()))
+}
+
// updateRecentTimestamp updates the recent timestamp using the algorithm
// described in https://tools.ietf.org/html/rfc7323#section-4.3
func (e *endpoint) updateRecentTimestamp(tsVal uint32, maxSentAck seqnum.Value, segSeq seqnum.Value) {
@@ -1708,6 +1947,13 @@ func (e *endpoint) completeState() stack.TCPEndpointState {
s.RcvBufSize = e.rcvBufSize
s.RcvBufUsed = e.rcvBufUsed
s.RcvClosed = e.rcvClosed
+ s.RcvAutoParams.MeasureTime = e.rcvAutoParams.measureTime
+ s.RcvAutoParams.CopiedBytes = e.rcvAutoParams.copied
+ s.RcvAutoParams.PrevCopiedBytes = e.rcvAutoParams.prevCopied
+ s.RcvAutoParams.RTT = e.rcvAutoParams.rtt
+ s.RcvAutoParams.RTTMeasureSeqNumber = e.rcvAutoParams.rttMeasureSeqNumber
+ s.RcvAutoParams.RTTMeasureTime = e.rcvAutoParams.rttMeasureTime
+ s.RcvAutoParams.Disabled = e.rcvAutoParams.disabled
e.rcvListMu.Unlock()
// Endpoint TCP Option state.
@@ -1761,13 +2007,13 @@ func (e *endpoint) completeState() stack.TCPEndpointState {
RTTMeasureTime: e.snd.rttMeasureTime,
Closed: e.snd.closed,
RTO: e.snd.rto,
- SRTTInited: e.snd.srttInited,
MaxPayloadSize: e.snd.maxPayloadSize,
SndWndScale: e.snd.sndWndScale,
MaxSentAck: e.snd.maxSentAck,
}
e.snd.rtt.Lock()
s.Sender.SRTT = e.snd.rtt.srtt
+ s.Sender.SRTTInited = e.snd.rtt.srttInited
e.snd.rtt.Unlock()
if cubic, ok := e.snd.cc.(*cubicState); ok {
@@ -1815,3 +2061,7 @@ func (e *endpoint) State() uint32 {
defer e.mu.Unlock()
return uint32(e.state)
}
+
+func mssForRoute(r *stack.Route) uint16 {
+ return uint16(r.MTU() - header.TCPMinimumSize)
+}
diff --git a/pkg/tcpip/transport/tcp/endpoint_state.go b/pkg/tcpip/transport/tcp/endpoint_state.go
index 5f30c2374..b93959034 100644
--- a/pkg/tcpip/transport/tcp/endpoint_state.go
+++ b/pkg/tcpip/transport/tcp/endpoint_state.go
@@ -19,9 +19,9 @@ import (
"sync"
"time"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
)
func (e *endpoint) drainSegmentLocked() {
@@ -342,6 +342,7 @@ func loadError(s string) *tcpip.Error {
tcpip.ErrNoBufferSpace,
tcpip.ErrBroadcastDisabled,
tcpip.ErrNotPermitted,
+ tcpip.ErrAddressFamilyNotSupported,
}
messageToError = make(map[string]*tcpip.Error)
@@ -360,3 +361,23 @@ func loadError(s string) *tcpip.Error {
return e
}
+
+// saveMeasureTime is invoked by stateify.
+func (r *rcvBufAutoTuneParams) saveMeasureTime() unixTime {
+ return unixTime{r.measureTime.Unix(), r.measureTime.UnixNano()}
+}
+
+// loadMeasureTime is invoked by stateify.
+func (r *rcvBufAutoTuneParams) loadMeasureTime(unix unixTime) {
+ r.measureTime = time.Unix(unix.second, unix.nano)
+}
+
+// saveRttMeasureTime is invoked by stateify.
+func (r *rcvBufAutoTuneParams) saveRttMeasureTime() unixTime {
+ return unixTime{r.rttMeasureTime.Unix(), r.rttMeasureTime.UnixNano()}
+}
+
+// loadRttMeasureTime is invoked by stateify.
+func (r *rcvBufAutoTuneParams) loadRttMeasureTime(unix unixTime) {
+ r.rttMeasureTime = time.Unix(unix.second, unix.nano)
+}
diff --git a/pkg/tcpip/transport/tcp/forwarder.go b/pkg/tcpip/transport/tcp/forwarder.go
index c30b45c2c..63666f0b3 100644
--- a/pkg/tcpip/transport/tcp/forwarder.go
+++ b/pkg/tcpip/transport/tcp/forwarder.go
@@ -17,12 +17,12 @@ package tcp
import (
"sync"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/seqnum"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// Forwarder is a connection request forwarder, which allows clients to decide
@@ -47,7 +47,7 @@ type Forwarder struct {
// If rcvWnd is set to zero, the default buffer size is used instead.
func NewForwarder(s *stack.Stack, rcvWnd, maxInFlight int, handler func(*ForwarderRequest)) *Forwarder {
if rcvWnd == 0 {
- rcvWnd = DefaultBufferSize
+ rcvWnd = DefaultReceiveBufferSize
}
return &Forwarder{
maxInFlight: maxInFlight,
diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go
index b31bcccfa..ee04dcfcc 100644
--- a/pkg/tcpip/transport/tcp/protocol.go
+++ b/pkg/tcpip/transport/tcp/protocol.go
@@ -24,13 +24,13 @@ import (
"strings"
"sync"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/raw"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/seqnum"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/raw"
+ "gvisor.dev/gvisor/pkg/waiter"
)
const (
@@ -41,13 +41,18 @@ const (
ProtocolNumber = header.TCPProtocolNumber
// MinBufferSize is the smallest size of a receive or send buffer.
- minBufferSize = 4 << 10 // 4096 bytes.
+ MinBufferSize = 4 << 10 // 4096 bytes.
- // DefaultBufferSize is the default size of the receive and send buffers.
- DefaultBufferSize = 1 << 20 // 1MB
+ // DefaultSendBufferSize is the default size of the send buffer for
+ // an endpoint.
+ DefaultSendBufferSize = 1 << 20 // 1MB
- // MaxBufferSize is the largest size a receive and send buffer can grow to.
- maxBufferSize = 4 << 20 // 4MB
+ // DefaultReceiveBufferSize is the default size of the receive buffer
+ // for an endpoint.
+ DefaultReceiveBufferSize = 1 << 20 // 1MB
+
+ // MaxBufferSize is the largest size a receive/send buffer can grow to.
+ MaxBufferSize = 4 << 20 // 4MB
// MaxUnprocessedSegments is the maximum number of unprocessed segments
// that can be queued for a given endpoint.
@@ -79,13 +84,6 @@ const (
ccCubic = "cubic"
)
-// CongestionControlOption sets the current congestion control algorithm.
-type CongestionControlOption string
-
-// AvailableCongestionControlOption returns the supported congestion control
-// algorithms.
-type AvailableCongestionControlOption string
-
type protocol struct {
mu sync.Mutex
sackEnabled bool
@@ -93,7 +91,7 @@ type protocol struct {
recvBufferSize ReceiveBufferSizeOption
congestionControl string
availableCongestionControl []string
- allowedCongestionControl []string
+ moderateReceiveBuffer bool
}
// Number returns the tcp protocol number.
@@ -188,7 +186,7 @@ func (p *protocol) SetOption(option interface{}) *tcpip.Error {
p.mu.Unlock()
return nil
- case CongestionControlOption:
+ case tcpip.CongestionControlOption:
for _, c := range p.availableCongestionControl {
if string(v) == c {
p.mu.Lock()
@@ -197,7 +195,16 @@ func (p *protocol) SetOption(option interface{}) *tcpip.Error {
return nil
}
}
- return tcpip.ErrInvalidOptionValue
+ // linux returns ENOENT when an invalid congestion control
+ // is specified.
+ return tcpip.ErrNoSuchFile
+
+ case tcpip.ModerateReceiveBufferOption:
+ p.mu.Lock()
+ p.moderateReceiveBuffer = bool(v)
+ p.mu.Unlock()
+ return nil
+
default:
return tcpip.ErrUnknownProtocolOption
}
@@ -223,16 +230,25 @@ func (p *protocol) Option(option interface{}) *tcpip.Error {
*v = p.recvBufferSize
p.mu.Unlock()
return nil
- case *CongestionControlOption:
+
+ case *tcpip.CongestionControlOption:
p.mu.Lock()
- *v = CongestionControlOption(p.congestionControl)
+ *v = tcpip.CongestionControlOption(p.congestionControl)
p.mu.Unlock()
return nil
- case *AvailableCongestionControlOption:
+
+ case *tcpip.AvailableCongestionControlOption:
p.mu.Lock()
- *v = AvailableCongestionControlOption(strings.Join(p.availableCongestionControl, " "))
+ *v = tcpip.AvailableCongestionControlOption(strings.Join(p.availableCongestionControl, " "))
p.mu.Unlock()
return nil
+
+ case *tcpip.ModerateReceiveBufferOption:
+ p.mu.Lock()
+ *v = tcpip.ModerateReceiveBufferOption(p.moderateReceiveBuffer)
+ p.mu.Unlock()
+ return nil
+
default:
return tcpip.ErrUnknownProtocolOption
}
@@ -241,8 +257,8 @@ func (p *protocol) Option(option interface{}) *tcpip.Error {
func init() {
stack.RegisterTransportProtocolFactory(ProtocolName, func() stack.TransportProtocol {
return &protocol{
- sendBufferSize: SendBufferSizeOption{minBufferSize, DefaultBufferSize, maxBufferSize},
- recvBufferSize: ReceiveBufferSizeOption{minBufferSize, DefaultBufferSize, maxBufferSize},
+ sendBufferSize: SendBufferSizeOption{MinBufferSize, DefaultSendBufferSize, MaxBufferSize},
+ recvBufferSize: ReceiveBufferSizeOption{MinBufferSize, DefaultReceiveBufferSize, MaxBufferSize},
congestionControl: ccReno,
availableCongestionControl: []string{ccReno, ccCubic},
}
diff --git a/pkg/tcpip/transport/tcp/rcv.go b/pkg/tcpip/transport/tcp/rcv.go
index f02fa6105..e90f9a7d9 100644
--- a/pkg/tcpip/transport/tcp/rcv.go
+++ b/pkg/tcpip/transport/tcp/rcv.go
@@ -16,9 +16,10 @@ package tcp
import (
"container/heap"
+ "time"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/seqnum"
)
// receiver holds the state necessary to receive TCP segments and turn them
@@ -38,6 +39,9 @@ type receiver struct {
// shrinking it.
rcvAcc seqnum.Value
+ // rcvWnd is the non-scaled receive window last advertised to the peer.
+ rcvWnd seqnum.Size
+
rcvWndScale uint8
closed bool
@@ -47,13 +51,14 @@ type receiver struct {
pendingBufSize seqnum.Size
}
-func newReceiver(ep *endpoint, irs seqnum.Value, rcvWnd seqnum.Size, rcvWndScale uint8) *receiver {
+func newReceiver(ep *endpoint, irs seqnum.Value, rcvWnd seqnum.Size, rcvWndScale uint8, pendingBufSize seqnum.Size) *receiver {
return &receiver{
ep: ep,
rcvNxt: irs + 1,
rcvAcc: irs.Add(rcvWnd + 1),
+ rcvWnd: rcvWnd,
rcvWndScale: rcvWndScale,
- pendingBufSize: rcvWnd,
+ pendingBufSize: pendingBufSize,
}
}
@@ -72,14 +77,16 @@ func (r *receiver) acceptable(segSeq seqnum.Value, segLen seqnum.Size) bool {
// getSendParams returns the parameters needed by the sender when building
// segments to send.
func (r *receiver) getSendParams() (rcvNxt seqnum.Value, rcvWnd seqnum.Size) {
- // Calculate the window size based on the current buffer size.
- n := r.ep.receiveBufferAvailable()
- acc := r.rcvNxt.Add(seqnum.Size(n))
+ // Calculate the window size based on the available buffer space.
+ receiveBufferAvailable := r.ep.receiveBufferAvailable()
+ acc := r.rcvNxt.Add(seqnum.Size(receiveBufferAvailable))
if r.rcvAcc.LessThan(acc) {
r.rcvAcc = acc
}
-
- return r.rcvNxt, r.rcvNxt.Size(r.rcvAcc) >> r.rcvWndScale
+ // Stash away the non-scaled receive window as we use it for measuring
+ // receiver's estimated RTT.
+ r.rcvWnd = r.rcvNxt.Size(r.rcvAcc)
+ return r.rcvNxt, r.rcvWnd >> r.rcvWndScale
}
// nonZeroWindow is called when the receive window grows from zero to nonzero;
@@ -130,6 +137,21 @@ func (r *receiver) consumeSegment(s *segment, segSeq seqnum.Value, segLen seqnum
// Update the segment that we're expecting to consume.
r.rcvNxt = segSeq.Add(segLen)
+ // In cases of a misbehaving sender which could send more than the
+ // advertised window, we could end up in a situation where we get a
+ // segment that exceeds the window advertised. Instead of partially
+ // accepting the segment and discarding bytes beyond the advertised
+ // window, we accept the whole segment and make sure r.rcvAcc is moved
+ // forward to match r.rcvNxt to indicate that the window is now closed.
+ //
+ // In absence of this check the r.acceptable() check fails and accepts
+ // segments that should be dropped because rcvWnd is calculated as
+ // the size of the interval (rcvNxt, rcvAcc] which becomes extremely
+ // large if rcvAcc is ever less than rcvNxt.
+ if r.rcvAcc.LessThan(r.rcvNxt) {
+ r.rcvAcc = r.rcvNxt
+ }
+
// Trim SACK Blocks to remove any SACK information that covers
// sequence numbers that have been consumed.
TrimSACKBlockList(&r.ep.sack, r.rcvNxt)
@@ -198,6 +220,39 @@ func (r *receiver) consumeSegment(s *segment, segSeq seqnum.Value, segLen seqnum
return true
}
+// updateRTT updates the receiver RTT measurement based on the sequence number
+// of the received segment.
+func (r *receiver) updateRTT() {
+ // From: https://public.lanl.gov/radiant/pubs/drs/sc2001-poster.pdf
+ //
+ // A system that is only transmitting acknowledgements can still
+ // estimate the round-trip time by observing the time between when a byte
+ // is first acknowledged and the receipt of data that is at least one
+ // window beyond the sequence number that was acknowledged.
+ r.ep.rcvListMu.Lock()
+ if r.ep.rcvAutoParams.rttMeasureTime.IsZero() {
+ // New measurement.
+ r.ep.rcvAutoParams.rttMeasureTime = time.Now()
+ r.ep.rcvAutoParams.rttMeasureSeqNumber = r.rcvNxt.Add(r.rcvWnd)
+ r.ep.rcvListMu.Unlock()
+ return
+ }
+ if r.rcvNxt.LessThan(r.ep.rcvAutoParams.rttMeasureSeqNumber) {
+ r.ep.rcvListMu.Unlock()
+ return
+ }
+ rtt := time.Since(r.ep.rcvAutoParams.rttMeasureTime)
+ // We only store the minimum observed RTT here as this is only used in
+ // absence of a SRTT available from either timestamps or a sender
+ // measurement of RTT.
+ if r.ep.rcvAutoParams.rtt == 0 || rtt < r.ep.rcvAutoParams.rtt {
+ r.ep.rcvAutoParams.rtt = rtt
+ }
+ r.ep.rcvAutoParams.rttMeasureTime = time.Now()
+ r.ep.rcvAutoParams.rttMeasureSeqNumber = r.rcvNxt.Add(r.rcvWnd)
+ r.ep.rcvListMu.Unlock()
+}
+
// handleRcvdSegment handles TCP segments directed at the connection managed by
// r as they arrive. It is called by the protocol main loop.
func (r *receiver) handleRcvdSegment(s *segment) {
@@ -226,10 +281,9 @@ func (r *receiver) handleRcvdSegment(s *segment) {
r.pendingBufUsed += s.logicalLen()
s.incRef()
heap.Push(&r.pendingRcvdSegments, s)
+ UpdateSACKBlocks(&r.ep.sack, segSeq, segSeq.Add(segLen), r.rcvNxt)
}
- UpdateSACKBlocks(&r.ep.sack, segSeq, segSeq.Add(segLen), r.rcvNxt)
-
// Immediately send an ack so that the peer knows it may
// have to retransmit.
r.ep.snd.sendAck()
@@ -237,6 +291,12 @@ func (r *receiver) handleRcvdSegment(s *segment) {
return
}
+ // Since we consumed a segment update the receiver's RTT estimate
+ // if required.
+ if segLen > 0 {
+ r.updateRTT()
+ }
+
// By consuming the current segment, we may have filled a gap in the
// sequence number domain that allows pending segments to be consumed
// now. So try to do it.
diff --git a/pkg/tcpip/transport/tcp/sack.go b/pkg/tcpip/transport/tcp/sack.go
index 6a013d99b..7be86d68e 100644
--- a/pkg/tcpip/transport/tcp/sack.go
+++ b/pkg/tcpip/transport/tcp/sack.go
@@ -15,8 +15,8 @@
package tcp
import (
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/seqnum"
)
const (
@@ -31,6 +31,13 @@ const (
// segment identified by segStart->segEnd.
func UpdateSACKBlocks(sack *SACKInfo, segStart seqnum.Value, segEnd seqnum.Value, rcvNxt seqnum.Value) {
newSB := header.SACKBlock{Start: segStart, End: segEnd}
+
+ // Ignore any invalid SACK blocks or blocks that are before rcvNxt as
+ // those bytes have already been acked.
+ if newSB.End.LessThanEq(newSB.Start) || newSB.End.LessThan(rcvNxt) {
+ return
+ }
+
if sack.NumBlocks == 0 {
sack.Blocks[0] = newSB
sack.NumBlocks = 1
@@ -39,9 +46,8 @@ func UpdateSACKBlocks(sack *SACKInfo, segStart seqnum.Value, segEnd seqnum.Value
var n = 0
for i := 0; i < sack.NumBlocks; i++ {
start, end := sack.Blocks[i].Start, sack.Blocks[i].End
- if end.LessThanEq(start) || start.LessThanEq(rcvNxt) {
- // Discard any invalid blocks where end is before start
- // and discard any sack blocks that are before rcvNxt as
+ if end.LessThanEq(rcvNxt) {
+ // Discard any sack blocks that are before rcvNxt as
// those have already been acked.
continue
}
diff --git a/pkg/tcpip/transport/tcp/sack_scoreboard.go b/pkg/tcpip/transport/tcp/sack_scoreboard.go
index 1c5766a42..7ef2df377 100644
--- a/pkg/tcpip/transport/tcp/sack_scoreboard.go
+++ b/pkg/tcpip/transport/tcp/sack_scoreboard.go
@@ -19,8 +19,8 @@ import (
"strings"
"github.com/google/btree"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/seqnum"
)
const (
diff --git a/pkg/tcpip/transport/tcp/sack_scoreboard_test.go b/pkg/tcpip/transport/tcp/sack_scoreboard_test.go
index b59eedc9d..b4e5ba0df 100644
--- a/pkg/tcpip/transport/tcp/sack_scoreboard_test.go
+++ b/pkg/tcpip/transport/tcp/sack_scoreboard_test.go
@@ -17,9 +17,9 @@ package tcp_test
import (
"testing"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/seqnum"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/tcp"
)
const smss = 1500
diff --git a/pkg/tcpip/transport/tcp/segment.go b/pkg/tcpip/transport/tcp/segment.go
index 450d9fbc1..ea725d513 100644
--- a/pkg/tcpip/transport/tcp/segment.go
+++ b/pkg/tcpip/transport/tcp/segment.go
@@ -18,10 +18,10 @@ import (
"sync/atomic"
"time"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/seqnum"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
)
// segment represents a TCP segment. It holds the payload and parsed TCP segment
diff --git a/pkg/tcpip/transport/tcp/segment_state.go b/pkg/tcpip/transport/tcp/segment_state.go
index dd7e14aa6..7dc2741a6 100644
--- a/pkg/tcpip/transport/tcp/segment_state.go
+++ b/pkg/tcpip/transport/tcp/segment_state.go
@@ -17,7 +17,7 @@ package tcp
import (
"time"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
)
// saveData is invoked by stateify.
diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go
index b236d7af2..0fee7ab72 100644
--- a/pkg/tcpip/transport/tcp/snd.go
+++ b/pkg/tcpip/transport/tcp/snd.go
@@ -20,11 +20,11 @@ import (
"sync/atomic"
"time"
- "gvisor.googlesource.com/gvisor/pkg/sleep"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum"
+ "gvisor.dev/gvisor/pkg/sleep"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/seqnum"
)
const (
@@ -121,9 +121,8 @@ type sender struct {
// rtt.srtt, rtt.rttvar, and rto are the "smoothed round-trip time",
// "round-trip time variation" and "retransmit timeout", as defined in
// section 2 of RFC 6298.
- rtt rtt
- rto time.Duration
- srttInited bool
+ rtt rtt
+ rto time.Duration
// maxPayloadSize is the maximum size of the payload of a given segment.
// It is initialized on demand.
@@ -150,8 +149,9 @@ type sender struct {
type rtt struct {
sync.Mutex `state:"nosave"`
- srtt time.Duration
- rttvar time.Duration
+ srtt time.Duration
+ rttvar time.Duration
+ srttInited bool
}
// fastRecovery holds information related to fast recovery from a packet loss.
@@ -194,8 +194,6 @@ func newSender(ep *endpoint, iss, irs seqnum.Value, sndWnd seqnum.Size, mss uint
s := &sender{
ep: ep,
- sndCwnd: InitialCwnd,
- sndSsthresh: math.MaxInt64,
sndWnd: sndWnd,
sndUna: iss + 1,
sndNxt: iss + 1,
@@ -238,7 +236,13 @@ func newSender(ep *endpoint, iss, irs seqnum.Value, sndWnd seqnum.Size, mss uint
return s
}
-func (s *sender) initCongestionControl(congestionControlName CongestionControlOption) congestionControl {
+// initCongestionControl initializes the specified congestion control module and
+// returns a handle to it. It also initializes the sndCwnd and sndSsThresh to
+// their initial values.
+func (s *sender) initCongestionControl(congestionControlName tcpip.CongestionControlOption) congestionControl {
+ s.sndCwnd = InitialCwnd
+ s.sndSsthresh = math.MaxInt64
+
switch congestionControlName {
case ccCubic:
return newCubicCC(s)
@@ -319,10 +323,10 @@ func (s *sender) sendAck() {
// available. This is done in accordance with section 2 of RFC 6298.
func (s *sender) updateRTO(rtt time.Duration) {
s.rtt.Lock()
- if !s.srttInited {
+ if !s.rtt.srttInited {
s.rtt.rttvar = rtt / 2
s.rtt.srtt = rtt
- s.srttInited = true
+ s.rtt.srttInited = true
} else {
diff := s.rtt.srtt - rtt
if diff < 0 {
diff --git a/pkg/tcpip/transport/tcp/tcp_noracedetector_test.go b/pkg/tcpip/transport/tcp/tcp_noracedetector_test.go
index 4d1519860..272bbcdbd 100644
--- a/pkg/tcpip/transport/tcp/tcp_noracedetector_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_noracedetector_test.go
@@ -26,11 +26,11 @@ import (
"testing"
"time"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp/testing/context"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/tcp"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/tcp/testing/context"
)
func TestFastRecovery(t *testing.T) {
diff --git a/pkg/tcpip/transport/tcp/tcp_sack_test.go b/pkg/tcpip/transport/tcp/tcp_sack_test.go
index 025d133be..4e7f1a740 100644
--- a/pkg/tcpip/transport/tcp/tcp_sack_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_sack_test.go
@@ -21,13 +21,13 @@ import (
"testing"
"time"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp/testing/context"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/seqnum"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/tcp"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/tcp/testing/context"
)
// createConnectedWithSACKPermittedOption creates and connects c.ep with the
diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go
index 779ca8b76..915a98047 100644
--- a/pkg/tcpip/transport/tcp/tcp_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_test.go
@@ -21,20 +21,20 @@ import (
"testing"
"time"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/checker"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/loopback"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/ports"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp/testing/context"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/checker"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/link/loopback"
+ "gvisor.dev/gvisor/pkg/tcpip/link/sniffer"
+ "gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
+ "gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
+ "gvisor.dev/gvisor/pkg/tcpip/ports"
+ "gvisor.dev/gvisor/pkg/tcpip/seqnum"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/tcp"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/tcp/testing/context"
+ "gvisor.dev/gvisor/pkg/waiter"
)
const (
@@ -1110,8 +1110,9 @@ func TestNonScaledWindowAccept(t *testing.T) {
t.Fatalf("Listen failed: %v", err)
}
- // Do 3-way handshake.
- c.PassiveConnect(100, 2, header.TCPSynOptions{MSS: defaultIPv4MSS})
+ // Do 3-way handshake w/ window scaling disabled. The SYN-ACK to the SYN
+ // should not carry the window scaling option.
+ c.PassiveConnect(100, -1, header.TCPSynOptions{MSS: defaultIPv4MSS})
// Try to accept the connection.
we, ch := waiter.NewChannelEntry(nil)
@@ -1600,7 +1601,6 @@ func TestPassiveSendMSSLessThanMTU(t *testing.T) {
// Set the buffer size to a deterministic size so that we can check the
// window scaling option.
const rcvBufferSize = 0x20000
- const wndScale = 2
if err := ep.SetSockOpt(tcpip.ReceiveBufferSizeOption(rcvBufferSize)); err != nil {
t.Fatalf("SetSockOpt failed failed: %v", err)
}
@@ -1614,7 +1614,7 @@ func TestPassiveSendMSSLessThanMTU(t *testing.T) {
}
// Do 3-way handshake.
- c.PassiveConnect(maxPayload, wndScale, header.TCPSynOptions{MSS: mtu - header.IPv4MinimumSize - header.TCPMinimumSize})
+ c.PassiveConnect(maxPayload, -1, header.TCPSynOptions{MSS: mtu - header.IPv4MinimumSize - header.TCPMinimumSize})
// Try to accept the connection.
we, ch := waiter.NewChannelEntry(nil)
@@ -1713,7 +1713,7 @@ func TestForwarderSendMSSLessThanMTU(t *testing.T) {
s.SetTransportProtocolHandler(tcp.ProtocolNumber, f.HandlePacket)
// Do 3-way handshake.
- c.PassiveConnect(maxPayload, 1, header.TCPSynOptions{MSS: mtu - header.IPv4MinimumSize - header.TCPMinimumSize})
+ c.PassiveConnect(maxPayload, -1, header.TCPSynOptions{MSS: mtu - header.IPv4MinimumSize - header.TCPMinimumSize})
// Wait for connection to be available.
select {
@@ -2767,11 +2767,11 @@ func TestDefaultBufferSizes(t *testing.T) {
}
}()
- checkSendBufferSize(t, ep, tcp.DefaultBufferSize)
- checkRecvBufferSize(t, ep, tcp.DefaultBufferSize)
+ checkSendBufferSize(t, ep, tcp.DefaultSendBufferSize)
+ checkRecvBufferSize(t, ep, tcp.DefaultReceiveBufferSize)
// Change the default send buffer size.
- if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SendBufferSizeOption{1, tcp.DefaultBufferSize * 2, tcp.DefaultBufferSize * 20}); err != nil {
+ if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SendBufferSizeOption{1, tcp.DefaultSendBufferSize * 2, tcp.DefaultSendBufferSize * 20}); err != nil {
t.Fatalf("SetTransportProtocolOption failed: %v", err)
}
@@ -2781,11 +2781,11 @@ func TestDefaultBufferSizes(t *testing.T) {
t.Fatalf("NewEndpoint failed; %v", err)
}
- checkSendBufferSize(t, ep, tcp.DefaultBufferSize*2)
- checkRecvBufferSize(t, ep, tcp.DefaultBufferSize)
+ checkSendBufferSize(t, ep, tcp.DefaultSendBufferSize*2)
+ checkRecvBufferSize(t, ep, tcp.DefaultReceiveBufferSize)
// Change the default receive buffer size.
- if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{1, tcp.DefaultBufferSize * 3, tcp.DefaultBufferSize * 30}); err != nil {
+ if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{1, tcp.DefaultReceiveBufferSize * 3, tcp.DefaultReceiveBufferSize * 30}); err != nil {
t.Fatalf("SetTransportProtocolOption failed: %v", err)
}
@@ -2795,8 +2795,8 @@ func TestDefaultBufferSizes(t *testing.T) {
t.Fatalf("NewEndpoint failed; %v", err)
}
- checkSendBufferSize(t, ep, tcp.DefaultBufferSize*2)
- checkRecvBufferSize(t, ep, tcp.DefaultBufferSize*3)
+ checkSendBufferSize(t, ep, tcp.DefaultSendBufferSize*2)
+ checkRecvBufferSize(t, ep, tcp.DefaultReceiveBufferSize*3)
}
func TestMinMaxBufferSizes(t *testing.T) {
@@ -2810,11 +2810,11 @@ func TestMinMaxBufferSizes(t *testing.T) {
defer ep.Close()
// Change the min/max values for send/receive
- if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{200, tcp.DefaultBufferSize * 2, tcp.DefaultBufferSize * 20}); err != nil {
+ if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{200, tcp.DefaultReceiveBufferSize * 2, tcp.DefaultReceiveBufferSize * 20}); err != nil {
t.Fatalf("SetTransportProtocolOption failed: %v", err)
}
- if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SendBufferSizeOption{300, tcp.DefaultBufferSize * 3, tcp.DefaultBufferSize * 30}); err != nil {
+ if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SendBufferSizeOption{300, tcp.DefaultSendBufferSize * 3, tcp.DefaultSendBufferSize * 30}); err != nil {
t.Fatalf("SetTransportProtocolOption failed: %v", err)
}
@@ -2832,17 +2832,17 @@ func TestMinMaxBufferSizes(t *testing.T) {
checkSendBufferSize(t, ep, 300)
// Set values above the max.
- if err := ep.SetSockOpt(tcpip.ReceiveBufferSizeOption(1 + tcp.DefaultBufferSize*20)); err != nil {
+ if err := ep.SetSockOpt(tcpip.ReceiveBufferSizeOption(1 + tcp.DefaultReceiveBufferSize*20)); err != nil {
t.Fatalf("GetSockOpt failed: %v", err)
}
- checkRecvBufferSize(t, ep, tcp.DefaultBufferSize*20)
+ checkRecvBufferSize(t, ep, tcp.DefaultReceiveBufferSize*20)
- if err := ep.SetSockOpt(tcpip.SendBufferSizeOption(1 + tcp.DefaultBufferSize*30)); err != nil {
+ if err := ep.SetSockOpt(tcpip.SendBufferSizeOption(1 + tcp.DefaultSendBufferSize*30)); err != nil {
t.Fatalf("GetSockOpt failed: %v", err)
}
- checkSendBufferSize(t, ep, tcp.DefaultBufferSize*30)
+ checkSendBufferSize(t, ep, tcp.DefaultSendBufferSize*30)
}
func makeStack() (*stack.Stack, *tcpip.Error) {
@@ -3205,13 +3205,14 @@ func TestTCPEndpointProbe(t *testing.T) {
}
}
-func TestSetCongestionControl(t *testing.T) {
+func TestStackSetCongestionControl(t *testing.T) {
testCases := []struct {
- cc tcp.CongestionControlOption
- mustPass bool
+ cc tcpip.CongestionControlOption
+ err *tcpip.Error
}{
- {"reno", true},
- {"cubic", true},
+ {"reno", nil},
+ {"cubic", nil},
+ {"blahblah", tcpip.ErrNoSuchFile},
}
for _, tc := range testCases {
@@ -3221,62 +3222,135 @@ func TestSetCongestionControl(t *testing.T) {
s := c.Stack()
- if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tc.cc); err != nil && tc.mustPass {
- t.Fatalf("s.SetTransportProtocolOption(%v, %v) = %v, want not-nil", tcp.ProtocolNumber, tc.cc, err)
+ var oldCC tcpip.CongestionControlOption
+ if err := s.TransportProtocolOption(tcp.ProtocolNumber, &oldCC); err != nil {
+ t.Fatalf("s.TransportProtocolOption(%v, %v) = %v", tcp.ProtocolNumber, &oldCC, err)
+ }
+
+ if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tc.cc); err != tc.err {
+ t.Fatalf("s.SetTransportProtocolOption(%v, %v) = %v, want %v", tcp.ProtocolNumber, tc.cc, err, tc.err)
}
- var cc tcp.CongestionControlOption
+ var cc tcpip.CongestionControlOption
if err := s.TransportProtocolOption(tcp.ProtocolNumber, &cc); err != nil {
t.Fatalf("s.TransportProtocolOption(%v, %v) = %v", tcp.ProtocolNumber, &cc, err)
}
- if got, want := cc, tc.cc; got != want {
+
+ got, want := cc, oldCC
+ // If SetTransportProtocolOption is expected to succeed
+ // then the returned value for congestion control should
+ // match the one specified in the
+ // SetTransportProtocolOption call above, else it should
+ // be what it was before the call to
+ // SetTransportProtocolOption.
+ if tc.err == nil {
+ want = tc.cc
+ }
+ if got != want {
t.Fatalf("got congestion control: %v, want: %v", got, want)
}
})
}
}
-func TestAvailableCongestionControl(t *testing.T) {
+func TestStackAvailableCongestionControl(t *testing.T) {
c := context.New(t, 1500)
defer c.Cleanup()
s := c.Stack()
// Query permitted congestion control algorithms.
- var aCC tcp.AvailableCongestionControlOption
+ var aCC tcpip.AvailableCongestionControlOption
if err := s.TransportProtocolOption(tcp.ProtocolNumber, &aCC); err != nil {
t.Fatalf("s.TransportProtocolOption(%v, %v) = %v", tcp.ProtocolNumber, &aCC, err)
}
- if got, want := aCC, tcp.AvailableCongestionControlOption("reno cubic"); got != want {
- t.Fatalf("got tcp.AvailableCongestionControlOption: %v, want: %v", got, want)
+ if got, want := aCC, tcpip.AvailableCongestionControlOption("reno cubic"); got != want {
+ t.Fatalf("got tcpip.AvailableCongestionControlOption: %v, want: %v", got, want)
}
}
-func TestSetAvailableCongestionControl(t *testing.T) {
+func TestStackSetAvailableCongestionControl(t *testing.T) {
c := context.New(t, 1500)
defer c.Cleanup()
s := c.Stack()
// Setting AvailableCongestionControlOption should fail.
- aCC := tcp.AvailableCongestionControlOption("xyz")
+ aCC := tcpip.AvailableCongestionControlOption("xyz")
if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, &aCC); err == nil {
t.Fatalf("s.TransportProtocolOption(%v, %v) = nil, want non-nil", tcp.ProtocolNumber, &aCC)
}
// Verify that we still get the expected list of congestion control options.
- var cc tcp.AvailableCongestionControlOption
+ var cc tcpip.AvailableCongestionControlOption
if err := s.TransportProtocolOption(tcp.ProtocolNumber, &cc); err != nil {
t.Fatalf("s.TransportProtocolOption(%v, %v) = %v", tcp.ProtocolNumber, &cc, err)
}
- if got, want := cc, tcp.AvailableCongestionControlOption("reno cubic"); got != want {
- t.Fatalf("got tcp.AvailableCongestionControlOption: %v, want: %v", got, want)
+ if got, want := cc, tcpip.AvailableCongestionControlOption("reno cubic"); got != want {
+ t.Fatalf("got tcpip.AvailableCongestionControlOption: %v, want: %v", got, want)
+ }
+}
+
+func TestEndpointSetCongestionControl(t *testing.T) {
+ testCases := []struct {
+ cc tcpip.CongestionControlOption
+ err *tcpip.Error
+ }{
+ {"reno", nil},
+ {"cubic", nil},
+ {"blahblah", tcpip.ErrNoSuchFile},
+ }
+
+ for _, connected := range []bool{false, true} {
+ for _, tc := range testCases {
+ t.Run(fmt.Sprintf("SetSockOpt(.., %v) w/ connected = %v", tc.cc, connected), func(t *testing.T) {
+ c := context.New(t, 1500)
+ defer c.Cleanup()
+
+ // Create TCP endpoint.
+ var err *tcpip.Error
+ c.EP, err = c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &c.WQ)
+ if err != nil {
+ t.Fatalf("NewEndpoint failed: %v", err)
+ }
+
+ var oldCC tcpip.CongestionControlOption
+ if err := c.EP.GetSockOpt(&oldCC); err != nil {
+ t.Fatalf("c.EP.SockOpt(%v) = %v", &oldCC, err)
+ }
+
+ if connected {
+ c.Connect(789 /* iss */, 32768 /* rcvWnd */, nil)
+ }
+
+ if err := c.EP.SetSockOpt(tc.cc); err != tc.err {
+ t.Fatalf("c.EP.SetSockOpt(%v) = %v, want %v", tc.cc, err, tc.err)
+ }
+
+ var cc tcpip.CongestionControlOption
+ if err := c.EP.GetSockOpt(&cc); err != nil {
+ t.Fatalf("c.EP.SockOpt(%v) = %v", &cc, err)
+ }
+
+ got, want := cc, oldCC
+ // If SetSockOpt is expected to succeed then the
+ // returned value for congestion control should match
+ // the one specified in the SetSockOpt above, else it
+ // should be what it was before the call to SetSockOpt.
+ if tc.err == nil {
+ want = tc.cc
+ }
+ if got != want {
+ t.Fatalf("got congestion control: %v, want: %v", got, want)
+ }
+ })
+ }
}
}
func enableCUBIC(t *testing.T, c *context.Context) {
t.Helper()
- opt := tcp.CongestionControlOption("cubic")
+ opt := tcpip.CongestionControlOption("cubic")
if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, opt); err != nil {
t.Fatalf("c.s.SetTransportProtocolOption(tcp.ProtocolNumber, %v = %v", opt, err)
}
@@ -3902,3 +3976,273 @@ func TestEndpointBindListenAcceptState(t *testing.T) {
}
}
+
+// This test verifies that the auto tuning does not grow the receive buffer if
+// the application is not reading the data actively.
+func TestReceiveBufferAutoTuningApplicationLimited(t *testing.T) {
+ const mtu = 1500
+ const mss = mtu - header.IPv4MinimumSize - header.TCPMinimumSize
+
+ c := context.New(t, mtu)
+ defer c.Cleanup()
+
+ stk := c.Stack()
+ // Set lower limits for auto-tuning tests. This is required because the
+ // test stops the worker which can cause packets to be dropped because
+ // the segment queue holding unprocessed packets is limited to 500.
+ const receiveBufferSize = 80 << 10 // 80KB.
+ const maxReceiveBufferSize = receiveBufferSize * 10
+ if err := stk.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{1, receiveBufferSize, maxReceiveBufferSize}); err != nil {
+ t.Fatalf("SetTransportProtocolOption failed: %v", err)
+ }
+
+ // Enable auto-tuning.
+ if err := stk.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(true)); err != nil {
+ t.Fatalf("SetTransportProtocolOption failed: %v", err)
+ }
+ // Change the expected window scale to match the value needed for the
+ // maximum buffer size defined above.
+ c.WindowScale = uint8(tcp.FindWndScale(maxReceiveBufferSize))
+
+ rawEP := c.CreateConnectedWithOptions(header.TCPSynOptions{TS: true, WS: 4})
+
+ // NOTE: The timestamp values in the sent packets are meaningless to the
+ // peer so we just increment the timestamp value by 1 every batch as we
+ // are not really using them for anything. Send a single byte to verify
+ // the advertised window.
+ tsVal := rawEP.TSVal + 1
+
+ // Introduce a 25ms latency by delaying the first byte.
+ latency := 25 * time.Millisecond
+ time.Sleep(latency)
+ rawEP.SendPacketWithTS([]byte{1}, tsVal)
+
+ // Verify that the ACK has the expected window.
+ wantRcvWnd := receiveBufferSize
+ wantRcvWnd = (wantRcvWnd >> uint32(c.WindowScale))
+ rawEP.VerifyACKRcvWnd(uint16(wantRcvWnd - 1))
+ time.Sleep(25 * time.Millisecond)
+
+ // Allocate a large enough payload for the test.
+ b := make([]byte, int(receiveBufferSize)*2)
+ offset := 0
+ payloadSize := receiveBufferSize - 1
+ worker := (c.EP).(interface {
+ StopWork()
+ ResumeWork()
+ })
+ tsVal++
+
+ // Stop the worker goroutine.
+ worker.StopWork()
+ start := offset
+ end := offset + payloadSize
+ packetsSent := 0
+ for ; start < end; start += mss {
+ rawEP.SendPacketWithTS(b[start:start+mss], tsVal)
+ packetsSent++
+ }
+ // Resume the worker so that it only sees the packets once all of them
+ // are waiting to be read.
+ worker.ResumeWork()
+
+ // Since we read no bytes the window should goto zero till the
+ // application reads some of the data.
+ // Discard all intermediate acks except the last one.
+ if packetsSent > 100 {
+ for i := 0; i < (packetsSent / 100); i++ {
+ _ = c.GetPacket()
+ }
+ }
+ rawEP.VerifyACKRcvWnd(0)
+
+ time.Sleep(25 * time.Millisecond)
+ // Verify that sending more data when window is closed is dropped and
+ // not acked.
+ rawEP.SendPacketWithTS(b[start:start+mss], tsVal)
+
+ // Verify that the stack sends us back an ACK with the sequence number
+ // of the last packet sent indicating it was dropped.
+ p := c.GetPacket()
+ checker.IPv4(t, p, checker.TCP(
+ checker.AckNum(uint32(rawEP.NextSeqNum)-uint32(mss)),
+ checker.Window(0),
+ ))
+
+ // Now read all the data from the endpoint and verify that advertised
+ // window increases to the full available buffer size.
+ for {
+ _, _, err := c.EP.Read(nil)
+ if err == tcpip.ErrWouldBlock {
+ break
+ }
+ }
+
+ // Verify that we receive a non-zero window update ACK. When running
+ // under thread santizer this test can end up sending more than 1
+ // ack, 1 for the non-zero window
+ p = c.GetPacket()
+ checker.IPv4(t, p, checker.TCP(
+ checker.AckNum(uint32(rawEP.NextSeqNum)-uint32(mss)),
+ func(t *testing.T, h header.Transport) {
+ tcp, ok := h.(header.TCP)
+ if !ok {
+ return
+ }
+ if w := tcp.WindowSize(); w == 0 || w > uint16(wantRcvWnd) {
+ t.Errorf("expected a non-zero window: got %d, want <= wantRcvWnd", w, wantRcvWnd)
+ }
+ },
+ ))
+}
+
+// This test verifies that the auto tuning does not grow the receive buffer if
+// the application is not reading the data actively.
+func TestReceiveBufferAutoTuning(t *testing.T) {
+ const mtu = 1500
+ const mss = mtu - header.IPv4MinimumSize - header.TCPMinimumSize
+
+ c := context.New(t, mtu)
+ defer c.Cleanup()
+
+ // Enable Auto-tuning.
+ stk := c.Stack()
+ // Set lower limits for auto-tuning tests. This is required because the
+ // test stops the worker which can cause packets to be dropped because
+ // the segment queue holding unprocessed packets is limited to 500.
+ const receiveBufferSize = 80 << 10 // 80KB.
+ const maxReceiveBufferSize = receiveBufferSize * 10
+ if err := stk.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{1, receiveBufferSize, maxReceiveBufferSize}); err != nil {
+ t.Fatalf("SetTransportProtocolOption failed: %v", err)
+ }
+
+ // Enable auto-tuning.
+ if err := stk.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(true)); err != nil {
+ t.Fatalf("SetTransportProtocolOption failed: %v", err)
+ }
+ // Change the expected window scale to match the value needed for the
+ // maximum buffer size used by stack.
+ c.WindowScale = uint8(tcp.FindWndScale(maxReceiveBufferSize))
+
+ rawEP := c.CreateConnectedWithOptions(header.TCPSynOptions{TS: true, WS: 4})
+
+ wantRcvWnd := receiveBufferSize
+ scaleRcvWnd := func(rcvWnd int) uint16 {
+ return uint16(rcvWnd >> uint16(c.WindowScale))
+ }
+ // Allocate a large array to send to the endpoint.
+ b := make([]byte, receiveBufferSize*48)
+
+ // In every iteration we will send double the number of bytes sent in
+ // the previous iteration and read the same from the app. The received
+ // window should grow by at least 2x of bytes read by the app in every
+ // RTT.
+ offset := 0
+ payloadSize := receiveBufferSize / 8
+ worker := (c.EP).(interface {
+ StopWork()
+ ResumeWork()
+ })
+ tsVal := rawEP.TSVal
+ // We are going to do our own computation of what the moderated receive
+ // buffer should be based on sent/copied data per RTT and verify that
+ // the advertised window by the stack matches our calculations.
+ prevCopied := 0
+ done := false
+ latency := 1 * time.Millisecond
+ for i := 0; !done; i++ {
+ tsVal++
+
+ // Stop the worker goroutine.
+ worker.StopWork()
+ start := offset
+ end := offset + payloadSize
+ totalSent := 0
+ packetsSent := 0
+ for ; start < end; start += mss {
+ rawEP.SendPacketWithTS(b[start:start+mss], tsVal)
+ totalSent += mss
+ packetsSent++
+ }
+ // Resume it so that it only sees the packets once all of them
+ // are waiting to be read.
+ worker.ResumeWork()
+
+ // Give 1ms for the worker to process the packets.
+ time.Sleep(1 * time.Millisecond)
+
+ // Verify that the advertised window on the ACK is reduced by
+ // the total bytes sent.
+ expectedWnd := wantRcvWnd - totalSent
+ if packetsSent > 100 {
+ for i := 0; i < (packetsSent / 100); i++ {
+ _ = c.GetPacket()
+ }
+ }
+ rawEP.VerifyACKRcvWnd(scaleRcvWnd(expectedWnd))
+
+ // Now read all the data from the endpoint and invoke the
+ // moderation API to allow for receive buffer auto-tuning
+ // to happen before we measure the new window.
+ totalCopied := 0
+ for {
+ b, _, err := c.EP.Read(nil)
+ if err == tcpip.ErrWouldBlock {
+ break
+ }
+ totalCopied += len(b)
+ }
+
+ // Invoke the moderation API. This is required for auto-tuning
+ // to happen. This method is normally expected to be invoked
+ // from a higher layer than tcpip.Endpoint. So we simulate
+ // copying to user-space by invoking it explicitly here.
+ c.EP.ModerateRecvBuf(totalCopied)
+
+ // Now send a keep-alive packet to trigger an ACK so that we can
+ // measure the new window.
+ rawEP.NextSeqNum--
+ rawEP.SendPacketWithTS(nil, tsVal)
+ rawEP.NextSeqNum++
+
+ if i == 0 {
+ // In the first iteration the receiver based RTT is not
+ // yet known as a result the moderation code should not
+ // increase the advertised window.
+ rawEP.VerifyACKRcvWnd(scaleRcvWnd(wantRcvWnd))
+ prevCopied = totalCopied
+ } else {
+ rttCopied := totalCopied
+ if i == 1 {
+ // The moderation code accumulates copied bytes till
+ // RTT is established. So add in the bytes sent in
+ // the first iteration to the total bytes for this
+ // RTT.
+ rttCopied += prevCopied
+ // Now reset it to the initial value used by the
+ // auto tuning logic.
+ prevCopied = tcp.InitialCwnd * mss * 2
+ }
+ newWnd := rttCopied<<1 + 16*mss
+ grow := (newWnd * (rttCopied - prevCopied)) / prevCopied
+ newWnd += (grow << 1)
+ if newWnd > maxReceiveBufferSize {
+ newWnd = maxReceiveBufferSize
+ done = true
+ }
+ rawEP.VerifyACKRcvWnd(scaleRcvWnd(newWnd))
+ wantRcvWnd = newWnd
+ prevCopied = rttCopied
+ // Increase the latency after first two iterations to
+ // establish a low RTT value in the receiver since it
+ // only tracks the lowest value. This ensures that when
+ // ModerateRcvBuf is called the elapsed time is always >
+ // rtt. Without this the test is flaky due to delays due
+ // to scheduling/wakeup etc.
+ latency += 50 * time.Millisecond
+ }
+ time.Sleep(latency)
+ offset += payloadSize
+ payloadSize *= 2
+ }
+}
diff --git a/pkg/tcpip/transport/tcp/tcp_timestamp_test.go b/pkg/tcpip/transport/tcp/tcp_timestamp_test.go
index 039bbcfba..a641e953d 100644
--- a/pkg/tcpip/transport/tcp/tcp_timestamp_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_timestamp_test.go
@@ -20,13 +20,13 @@ import (
"testing"
"time"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/checker"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp/testing/context"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/checker"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/tcp"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/tcp/testing/context"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// createConnectedWithTimestampOption creates and connects c.ep with the
@@ -182,7 +182,7 @@ func TestTimeStampEnabledAccept(t *testing.T) {
wndSize uint16
}{
{true, -1, 0xffff}, // When cookie is used window scaling is disabled.
- {false, 5, 0x8000}, // 0x8000 * 2^5 = 1<<20 = 1MB window (the default).
+ {false, 5, 0x8000}, // DefaultReceiveBufferSize is 1MB >> 5.
}
for _, tc := range testCases {
timeStampEnabledAccept(t, tc.cookieEnabled, tc.wndScale, tc.wndSize)
@@ -239,7 +239,7 @@ func TestTimeStampDisabledAccept(t *testing.T) {
wndSize uint16
}{
{true, -1, 0xffff}, // When cookie is used window scaling is disabled.
- {false, 5, 0x8000}, // 0x8000 * 2^5 = 1<<20 = 1MB window (the default).
+ {false, 5, 0x8000}, // DefaultReceiveBufferSize is 1MB >> 5.
}
for _, tc := range testCases {
timeStampDisabledAccept(t, tc.cookieEnabled, tc.wndScale, tc.wndSize)
diff --git a/pkg/tcpip/transport/tcp/testing/context/BUILD b/pkg/tcpip/transport/tcp/testing/context/BUILD
index 1584e4095..19b0d31c5 100644
--- a/pkg/tcpip/transport/tcp/testing/context/BUILD
+++ b/pkg/tcpip/transport/tcp/testing/context/BUILD
@@ -6,7 +6,7 @@ go_library(
name = "context",
testonly = 1,
srcs = ["context.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp/testing/context",
+ importpath = "gvisor.dev/gvisor/pkg/tcpip/transport/tcp/testing/context",
visibility = [
"//:sandbox",
],
diff --git a/pkg/tcpip/transport/tcp/testing/context/context.go b/pkg/tcpip/transport/tcp/testing/context/context.go
index 69a43b6f4..630dd7925 100644
--- a/pkg/tcpip/transport/tcp/testing/context/context.go
+++ b/pkg/tcpip/transport/tcp/testing/context/context.go
@@ -21,18 +21,18 @@ import (
"testing"
"time"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/checker"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/channel"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/checker"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/link/channel"
+ "gvisor.dev/gvisor/pkg/tcpip/link/sniffer"
+ "gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
+ "gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
+ "gvisor.dev/gvisor/pkg/tcpip/seqnum"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/tcp"
+ "gvisor.dev/gvisor/pkg/waiter"
)
const (
@@ -72,12 +72,6 @@ const (
testInitialSequenceNumber = 789
)
-// defaultWindowScale value specified here depends on the tcp.DefaultBufferSize
-// constant defined in the tcp/endpoint.go because the tcp.DefaultBufferSize is
-// used in tcp.newHandshake to determine the window scale to use when sending a
-// SYN/SYN-ACK.
-var defaultWindowScale = tcp.FindWndScale(tcp.DefaultBufferSize)
-
// Headers is used to represent the TCP header fields when building a
// new packet.
type Headers struct {
@@ -134,6 +128,10 @@ type Context struct {
// TimeStampEnabled is true if ep is connected with the timestamp option
// enabled.
TimeStampEnabled bool
+
+ // WindowScale is the expected window scale in SYN packets sent by
+ // the stack.
+ WindowScale uint8
}
// New allocates and initializes a test context containing a new
@@ -142,11 +140,11 @@ func New(t *testing.T, mtu uint32) *Context {
s := stack.New([]string{ipv4.ProtocolName, ipv6.ProtocolName}, []string{tcp.ProtocolName}, stack.Options{})
// Allow minimum send/receive buffer sizes to be 1 during tests.
- if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SendBufferSizeOption{1, tcp.DefaultBufferSize, tcp.DefaultBufferSize * 10}); err != nil {
+ if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SendBufferSizeOption{1, tcp.DefaultSendBufferSize, 10 * tcp.DefaultSendBufferSize}); err != nil {
t.Fatalf("SetTransportProtocolOption failed: %v", err)
}
- if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{1, tcp.DefaultBufferSize, tcp.DefaultBufferSize * 10}); err != nil {
+ if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{1, tcp.DefaultReceiveBufferSize, 10 * tcp.DefaultReceiveBufferSize}); err != nil {
t.Fatalf("SetTransportProtocolOption failed: %v", err)
}
@@ -184,9 +182,10 @@ func New(t *testing.T, mtu uint32) *Context {
})
return &Context{
- t: t,
- s: s,
- linkEP: linkEP,
+ t: t,
+ s: s,
+ linkEP: linkEP,
+ WindowScale: uint8(tcp.FindWndScale(tcp.DefaultReceiveBufferSize)),
}
}
@@ -520,35 +519,21 @@ func (c *Context) CreateConnected(iss seqnum.Value, rcvWnd seqnum.Size, epRcvBuf
c.CreateConnectedWithRawOptions(iss, rcvWnd, epRcvBuf, nil)
}
-// CreateConnectedWithRawOptions creates a connected TCP endpoint and sends
-// the specified option bytes as the Option field in the initial SYN packet.
+// Connect performs the 3-way handshake for c.EP with the provided Initial
+// Sequence Number (iss) and receive window(rcvWnd) and any options if
+// specified.
//
// It also sets the receive buffer for the endpoint to the specified
// value in epRcvBuf.
-func (c *Context) CreateConnectedWithRawOptions(iss seqnum.Value, rcvWnd seqnum.Size, epRcvBuf *tcpip.ReceiveBufferSizeOption, options []byte) {
- // Create TCP endpoint.
- var err *tcpip.Error
- c.EP, err = c.s.NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &c.WQ)
- if err != nil {
- c.t.Fatalf("NewEndpoint failed: %v", err)
- }
- if got, want := tcp.EndpointState(c.EP.State()), tcp.StateInitial; got != want {
- c.t.Errorf("Unexpected endpoint state: want %v, got %v", want, got)
- }
-
- if epRcvBuf != nil {
- if err := c.EP.SetSockOpt(*epRcvBuf); err != nil {
- c.t.Fatalf("SetSockOpt failed failed: %v", err)
- }
- }
-
+//
+// PreCondition: c.EP must already be created.
+func (c *Context) Connect(iss seqnum.Value, rcvWnd seqnum.Size, options []byte) {
// Start connection attempt.
waitEntry, notifyCh := waiter.NewChannelEntry(nil)
c.WQ.EventRegister(&waitEntry, waiter.EventOut)
defer c.WQ.EventUnregister(&waitEntry)
- err = c.EP.Connect(tcpip.FullAddress{Addr: TestAddr, Port: TestPort})
- if err != tcpip.ErrConnectStarted {
+ if err := c.EP.Connect(tcpip.FullAddress{Addr: TestAddr, Port: TestPort}); err != tcpip.ErrConnectStarted {
c.t.Fatalf("Unexpected return value from Connect: %v", err)
}
@@ -590,8 +575,7 @@ func (c *Context) CreateConnectedWithRawOptions(iss seqnum.Value, rcvWnd seqnum.
// Wait for connection to be established.
select {
case <-notifyCh:
- err = c.EP.GetSockOpt(tcpip.ErrorOption{})
- if err != nil {
+ if err := c.EP.GetSockOpt(tcpip.ErrorOption{}); err != nil {
c.t.Fatalf("Unexpected error when connecting: %v", err)
}
case <-time.After(1 * time.Second):
@@ -604,6 +588,27 @@ func (c *Context) CreateConnectedWithRawOptions(iss seqnum.Value, rcvWnd seqnum.
c.Port = tcpHdr.SourcePort()
}
+// CreateConnectedWithRawOptions creates a connected TCP endpoint and sends
+// the specified option bytes as the Option field in the initial SYN packet.
+//
+// It also sets the receive buffer for the endpoint to the specified
+// value in epRcvBuf.
+func (c *Context) CreateConnectedWithRawOptions(iss seqnum.Value, rcvWnd seqnum.Size, epRcvBuf *tcpip.ReceiveBufferSizeOption, options []byte) {
+ // Create TCP endpoint.
+ var err *tcpip.Error
+ c.EP, err = c.s.NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &c.WQ)
+ if err != nil {
+ c.t.Fatalf("NewEndpoint failed: %v", err)
+ }
+
+ if epRcvBuf != nil {
+ if err := c.EP.SetSockOpt(*epRcvBuf); err != nil {
+ c.t.Fatalf("SetSockOpt failed failed: %v", err)
+ }
+ }
+ c.Connect(iss, rcvWnd, options)
+}
+
// RawEndpoint is just a small wrapper around a TCP endpoint's state to make
// sending data and ACK packets easy while being able to manipulate the sequence
// numbers and timestamp values as needed.
@@ -666,6 +671,21 @@ func (r *RawEndpoint) VerifyACKWithTS(tsVal uint32) {
r.RecentTS = opts.TSVal
}
+// VerifyACKRcvWnd verifies that the window advertised by the incoming ACK
+// matches the provided rcvWnd.
+func (r *RawEndpoint) VerifyACKRcvWnd(rcvWnd uint16) {
+ ackPacket := r.C.GetPacket()
+ checker.IPv4(r.C.t, ackPacket,
+ checker.TCP(
+ checker.DstPort(r.SrcPort),
+ checker.TCPFlags(header.TCPFlagAck),
+ checker.SeqNum(uint32(r.AckNum)),
+ checker.AckNum(uint32(r.NextSeqNum)),
+ checker.Window(rcvWnd),
+ ),
+ )
+}
+
// VerifyACKNoSACK verifies that the ACK does not contain a SACK block.
func (r *RawEndpoint) VerifyACKNoSACK() {
r.VerifyACKHasSACK(nil)
@@ -726,7 +746,7 @@ func (c *Context) CreateConnectedWithOptions(wantOptions header.TCPSynOptions) *
checker.TCPSynOptions(header.TCPSynOptions{
MSS: mss,
TS: true,
- WS: defaultWindowScale,
+ WS: int(c.WindowScale),
SACKPermitted: c.SACKEnabled(),
}),
),
@@ -741,6 +761,9 @@ func (c *Context) CreateConnectedWithOptions(wantOptions header.TCPSynOptions) *
// Build options w/ tsVal to be sent in the SYN-ACK.
synAckOptions := make([]byte, header.TCPOptionsMaximumSize)
offset := 0
+ if wantOptions.WS != -1 {
+ offset += header.EncodeWSOption(wantOptions.WS, synAckOptions[offset:])
+ }
if wantOptions.TS {
offset += header.EncodeTSOption(wantOptions.TSVal, synOptions.TSVal, synAckOptions[offset:])
}
diff --git a/pkg/tcpip/transport/tcp/timer.go b/pkg/tcpip/transport/tcp/timer.go
index fc1c7cbd2..c70525f27 100644
--- a/pkg/tcpip/transport/tcp/timer.go
+++ b/pkg/tcpip/transport/tcp/timer.go
@@ -17,7 +17,7 @@ package tcp
import (
"time"
- "gvisor.googlesource.com/gvisor/pkg/sleep"
+ "gvisor.dev/gvisor/pkg/sleep"
)
type timerState int
diff --git a/pkg/tcpip/transport/tcpconntrack/BUILD b/pkg/tcpip/transport/tcpconntrack/BUILD
index 31a845dee..4bec48c0f 100644
--- a/pkg/tcpip/transport/tcpconntrack/BUILD
+++ b/pkg/tcpip/transport/tcpconntrack/BUILD
@@ -5,7 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "tcpconntrack",
srcs = ["tcp_conntrack.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcpconntrack",
+ importpath = "gvisor.dev/gvisor/pkg/tcpip/transport/tcpconntrack",
visibility = ["//visibility:public"],
deps = [
"//pkg/tcpip/header",
diff --git a/pkg/tcpip/transport/tcpconntrack/tcp_conntrack.go b/pkg/tcpip/transport/tcpconntrack/tcp_conntrack.go
index f1dcd36d5..93712cd45 100644
--- a/pkg/tcpip/transport/tcpconntrack/tcp_conntrack.go
+++ b/pkg/tcpip/transport/tcpconntrack/tcp_conntrack.go
@@ -18,8 +18,8 @@
package tcpconntrack
import (
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/seqnum"
)
// Result is returned when the state of a TCB is updated in response to an
diff --git a/pkg/tcpip/transport/tcpconntrack/tcp_conntrack_test.go b/pkg/tcpip/transport/tcpconntrack/tcp_conntrack_test.go
index 435e136de..5e271b7ca 100644
--- a/pkg/tcpip/transport/tcpconntrack/tcp_conntrack_test.go
+++ b/pkg/tcpip/transport/tcpconntrack/tcp_conntrack_test.go
@@ -17,8 +17,8 @@ package tcpconntrack_test
import (
"testing"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcpconntrack"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/tcpconntrack"
)
// connected creates a connection tracker TCB and sets it to a connected state
diff --git a/pkg/tcpip/transport/udp/BUILD b/pkg/tcpip/transport/udp/BUILD
index b9520d6e0..6dac66b50 100644
--- a/pkg/tcpip/transport/udp/BUILD
+++ b/pkg/tcpip/transport/udp/BUILD
@@ -24,8 +24,8 @@ go_library(
"protocol.go",
"udp_packet_list.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/udp",
- imports = ["gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"],
+ importpath = "gvisor.dev/gvisor/pkg/tcpip/transport/udp",
+ imports = ["gvisor.dev/gvisor/pkg/tcpip/buffer"],
visibility = ["//visibility:public"],
deps = [
"//pkg/sleep",
diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go
index fa7278286..cb0ea83a6 100644
--- a/pkg/tcpip/transport/udp/endpoint.go
+++ b/pkg/tcpip/transport/udp/endpoint.go
@@ -18,11 +18,11 @@ import (
"math"
"sync"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// +stateify savable
@@ -169,6 +169,9 @@ func (e *endpoint) Close() {
e.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.EventIn | waiter.EventOut)
}
+// ModerateRecvBuf implements tcpip.Endpoint.ModerateRecvBuf.
+func (e *endpoint) ModerateRecvBuf(copied int) {}
+
// Read reads data from the endpoint. This method does not block if
// there is no data pending.
func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) {
@@ -695,8 +698,44 @@ func (e *endpoint) checkV4Mapped(addr *tcpip.FullAddress, allowMismatch bool) (t
return netProto, nil
}
+func (e *endpoint) disconnect() *tcpip.Error {
+ e.mu.Lock()
+ defer e.mu.Unlock()
+
+ if e.state != stateConnected {
+ return nil
+ }
+ id := stack.TransportEndpointID{}
+ // Exclude ephemerally bound endpoints.
+ if e.bindNICID != 0 || e.id.LocalAddress == "" {
+ var err *tcpip.Error
+ id = stack.TransportEndpointID{
+ LocalPort: e.id.LocalPort,
+ LocalAddress: e.id.LocalAddress,
+ }
+ id, err = e.registerWithStack(e.regNICID, e.effectiveNetProtos, id)
+ if err != nil {
+ return err
+ }
+ e.state = stateBound
+ } else {
+ e.state = stateInitial
+ }
+
+ e.stack.UnregisterTransportEndpoint(e.regNICID, e.effectiveNetProtos, ProtocolNumber, e.id, e)
+ e.id = id
+ e.route.Release()
+ e.route = stack.Route{}
+ e.dstPort = 0
+
+ return nil
+}
+
// Connect connects the endpoint to its peer. Specifying a NIC is optional.
func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error {
+ if addr.Addr == "" {
+ return e.disconnect()
+ }
if addr.Port == 0 {
// We don't support connecting to port zero.
return tcpip.ErrInvalidEndpointState
@@ -731,12 +770,16 @@ func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error {
defer r.Release()
id := stack.TransportEndpointID{
- LocalAddress: r.LocalAddress,
+ LocalAddress: e.id.LocalAddress,
LocalPort: localPort,
RemotePort: addr.Port,
RemoteAddress: r.RemoteAddress,
}
+ if e.state == stateInitial {
+ id.LocalAddress = r.LocalAddress
+ }
+
// Even if we're connected, this endpoint can still be used to send
// packets on a different network protocol, so we register both even if
// v6only is set to false and this is an ipv6 endpoint.
diff --git a/pkg/tcpip/transport/udp/endpoint_state.go b/pkg/tcpip/transport/udp/endpoint_state.go
index 74e8e9fd5..18e786397 100644
--- a/pkg/tcpip/transport/udp/endpoint_state.go
+++ b/pkg/tcpip/transport/udp/endpoint_state.go
@@ -15,10 +15,10 @@
package udp
import (
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
)
// saveData saves udpPacket.data field.
@@ -92,8 +92,6 @@ func (e *endpoint) afterLoad() {
if err != nil {
panic(*err)
}
-
- e.id.LocalAddress = e.route.LocalAddress
} else if len(e.id.LocalAddress) != 0 { // stateBound
if e.stack.CheckLocalAddress(e.regNICID, netProto, e.id.LocalAddress) == 0 {
panic(tcpip.ErrBadLocalAddress)
diff --git a/pkg/tcpip/transport/udp/forwarder.go b/pkg/tcpip/transport/udp/forwarder.go
index 25bdd2929..a874fc9fd 100644
--- a/pkg/tcpip/transport/udp/forwarder.go
+++ b/pkg/tcpip/transport/udp/forwarder.go
@@ -15,10 +15,10 @@
package udp
import (
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/waiter"
)
// Forwarder is a session request forwarder, which allows clients to decide
diff --git a/pkg/tcpip/transport/udp/protocol.go b/pkg/tcpip/transport/udp/protocol.go
index 3d31dfbf1..f76e7fbe1 100644
--- a/pkg/tcpip/transport/udp/protocol.go
+++ b/pkg/tcpip/transport/udp/protocol.go
@@ -21,12 +21,12 @@
package udp
import (
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/raw"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/raw"
+ "gvisor.dev/gvisor/pkg/waiter"
)
const (
diff --git a/pkg/tcpip/transport/udp/udp_test.go b/pkg/tcpip/transport/udp/udp_test.go
index 86a8fa19b..75129a2ff 100644
--- a/pkg/tcpip/transport/udp/udp_test.go
+++ b/pkg/tcpip/transport/udp/udp_test.go
@@ -21,17 +21,17 @@ import (
"testing"
"time"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/checker"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/channel"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/udp"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/checker"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/link/channel"
+ "gvisor.dev/gvisor/pkg/tcpip/link/sniffer"
+ "gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
+ "gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/udp"
+ "gvisor.dev/gvisor/pkg/waiter"
)
const (
diff --git a/pkg/tmutex/BUILD b/pkg/tmutex/BUILD
index 69035044d..98d51cc69 100644
--- a/pkg/tmutex/BUILD
+++ b/pkg/tmutex/BUILD
@@ -5,7 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "tmutex",
srcs = ["tmutex.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/tmutex",
+ importpath = "gvisor.dev/gvisor/pkg/tmutex",
visibility = ["//:sandbox"],
)
diff --git a/pkg/unet/BUILD b/pkg/unet/BUILD
index 5e177e78e..769509e80 100644
--- a/pkg/unet/BUILD
+++ b/pkg/unet/BUILD
@@ -8,7 +8,7 @@ go_library(
"unet.go",
"unet_unsafe.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/unet",
+ importpath = "gvisor.dev/gvisor/pkg/unet",
visibility = ["//visibility:public"],
deps = [
"//pkg/abi/linux",
diff --git a/pkg/unet/unet.go b/pkg/unet/unet.go
index 2aa1af4ff..d843f19cf 100644
--- a/pkg/unet/unet.go
+++ b/pkg/unet/unet.go
@@ -23,7 +23,7 @@ import (
"sync/atomic"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/gate"
+ "gvisor.dev/gvisor/pkg/gate"
)
// backlog is used for the listen request.
diff --git a/pkg/unet/unet_test.go b/pkg/unet/unet_test.go
index 763b23c7c..a3cc6f5d3 100644
--- a/pkg/unet/unet_test.go
+++ b/pkg/unet/unet_test.go
@@ -691,3 +691,45 @@ func TestControlMessage(t *testing.T) {
}
}
}
+
+func benchmarkSendRecv(b *testing.B, packet bool) {
+ server, client, err := SocketPair(packet)
+ if err != nil {
+ b.Fatalf("SocketPair: got %v, wanted nil", err)
+ }
+ defer server.Close()
+ defer client.Close()
+ go func() {
+ buf := make([]byte, 1)
+ for i := 0; i < b.N; i++ {
+ n, err := server.Read(buf)
+ if n != 1 || err != nil {
+ b.Fatalf("server.Read: got (%d, %v), wanted (1, nil)", n, err)
+ }
+ n, err = server.Write(buf)
+ if n != 1 || err != nil {
+ b.Fatalf("server.Write: got (%d, %v), wanted (1, nil)", n, err)
+ }
+ }
+ }()
+ buf := make([]byte, 1)
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ n, err := client.Write(buf)
+ if n != 1 || err != nil {
+ b.Fatalf("client.Write: got (%d, %v), wanted (1, nil)", n, err)
+ }
+ n, err = client.Read(buf)
+ if n != 1 || err != nil {
+ b.Fatalf("client.Read: got (%d, %v), wanted (1, nil)", n, err)
+ }
+ }
+}
+
+func BenchmarkSendRecvStream(b *testing.B) {
+ benchmarkSendRecv(b, false)
+}
+
+func BenchmarkSendRecvPacket(b *testing.B) {
+ benchmarkSendRecv(b, true)
+}
diff --git a/pkg/unet/unet_unsafe.go b/pkg/unet/unet_unsafe.go
index fa0916439..f8a42c914 100644
--- a/pkg/unet/unet_unsafe.go
+++ b/pkg/unet/unet_unsafe.go
@@ -21,7 +21,7 @@ import (
"syscall"
"unsafe"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/abi/linux"
)
// wait blocks until the socket FD is ready for reading or writing, depending
diff --git a/pkg/urpc/BUILD b/pkg/urpc/BUILD
index 0192fb35b..b7f505a84 100644
--- a/pkg/urpc/BUILD
+++ b/pkg/urpc/BUILD
@@ -5,7 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "urpc",
srcs = ["urpc.go"],
- importpath = "gvisor.googlesource.com/gvisor/pkg/urpc",
+ importpath = "gvisor.dev/gvisor/pkg/urpc",
visibility = ["//:sandbox"],
deps = [
"//pkg/fd",
diff --git a/pkg/urpc/urpc.go b/pkg/urpc/urpc.go
index 4ea684659..df59ffab1 100644
--- a/pkg/urpc/urpc.go
+++ b/pkg/urpc/urpc.go
@@ -29,9 +29,9 @@ import (
"runtime"
"sync"
- "gvisor.googlesource.com/gvisor/pkg/fd"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/unet"
+ "gvisor.dev/gvisor/pkg/fd"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/unet"
)
// maxFiles determines the maximum file payload.
@@ -72,7 +72,7 @@ type FilePayload struct {
Files []*os.File `json:"-"`
}
-// ReleaseFD releases the indexth FD.
+// ReleaseFD releases the FD at the specified index.
func (f *FilePayload) ReleaseFD(index int) (*fd.FD, error) {
return fd.NewFromFile(f.Files[index])
}
diff --git a/pkg/urpc/urpc_test.go b/pkg/urpc/urpc_test.go
index 5bf2c5ed2..c6c7ce9d4 100644
--- a/pkg/urpc/urpc_test.go
+++ b/pkg/urpc/urpc_test.go
@@ -19,7 +19,7 @@ import (
"os"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/unet"
+ "gvisor.dev/gvisor/pkg/unet"
)
type test struct {
diff --git a/pkg/waiter/BUILD b/pkg/waiter/BUILD
index 48ce063d7..9173dfd0f 100644
--- a/pkg/waiter/BUILD
+++ b/pkg/waiter/BUILD
@@ -21,7 +21,7 @@ go_library(
"waiter.go",
"waiter_list.go",
],
- importpath = "gvisor.googlesource.com/gvisor/pkg/waiter",
+ importpath = "gvisor.dev/gvisor/pkg/waiter",
visibility = ["//visibility:public"],
)
diff --git a/runsc/BUILD b/runsc/BUILD
index 3d6c92e4c..6b8c92706 100644
--- a/runsc/BUILD
+++ b/runsc/BUILD
@@ -16,6 +16,7 @@ go_binary(
x_defs = {"main.version": "{VERSION}"},
deps = [
"//pkg/log",
+ "//pkg/sentry/platform",
"//runsc/boot",
"//runsc/cmd",
"//runsc/specutils",
@@ -47,6 +48,7 @@ go_binary(
x_defs = {"main.version": "{VERSION}"},
deps = [
"//pkg/log",
+ "//pkg/sentry/platform",
"//runsc/boot",
"//runsc/cmd",
"//runsc/specutils",
@@ -82,7 +84,7 @@ pkg_tar(
genrule(
name = "deb-version",
outs = ["version.txt"],
- cmd = "$(location :runsc) -version | head -n 1 | sed 's/^[^0-9]*//' > $@",
+ cmd = "$(location :runsc) -version | grep 'runsc version' | sed 's/^[^0-9]*//' > $@",
stamp = 1,
tools = [":runsc"],
)
@@ -96,6 +98,11 @@ pkg_deb(
maintainer = "The gVisor Authors <gvisor-dev@googlegroups.com>",
package = "runsc",
postinst = "debian/postinst.sh",
+ tags = [
+ # TODO(b/135475885): pkg_deb requires python2:
+ # https://github.com/bazelbuild/bazel/issues/8443
+ "manual",
+ ],
version_file = ":version.txt",
visibility = [
"//visibility:public",
diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD
index ac28c4339..5025401dd 100644
--- a/runsc/boot/BUILD
+++ b/runsc/boot/BUILD
@@ -16,9 +16,11 @@ go_library(
"limits.go",
"loader.go",
"network.go",
+ "pprof.go",
"strace.go",
+ "user.go",
],
- importpath = "gvisor.googlesource.com/gvisor/runsc/boot",
+ importpath = "gvisor.dev/gvisor/runsc/boot",
visibility = [
"//runsc:__subpackages__",
"//test:__subpackages__",
@@ -32,6 +34,7 @@ go_library(
"//pkg/log",
"//pkg/memutil",
"//pkg/rand",
+ "//pkg/refs",
"//pkg/sentry/arch",
"//pkg/sentry/arch:registers_go_proto",
"//pkg/sentry/context",
@@ -49,13 +52,10 @@ go_library(
"//pkg/sentry/kernel",
"//pkg/sentry/kernel:uncaught_signal_go_proto",
"//pkg/sentry/kernel/auth",
- "//pkg/sentry/kernel/kdefs",
"//pkg/sentry/limits",
"//pkg/sentry/loader",
"//pkg/sentry/pgalloc",
"//pkg/sentry/platform",
- "//pkg/sentry/platform/kvm",
- "//pkg/sentry/platform/ptrace",
"//pkg/sentry/sighandling",
"//pkg/sentry/socket/epsocket",
"//pkg/sentry/socket/hostinet",
@@ -68,6 +68,7 @@ go_library(
"//pkg/sentry/time",
"//pkg/sentry/unimpl:unimplemented_syscall_go_proto",
"//pkg/sentry/usage",
+ "//pkg/sentry/usermem",
"//pkg/sentry/watchdog",
"//pkg/syserror",
"//pkg/tcpip",
@@ -83,6 +84,7 @@ go_library(
"//pkg/tcpip/transport/udp",
"//pkg/urpc",
"//runsc/boot/filter",
+ "//runsc/boot/platforms",
"//runsc/specutils",
"@com_github_golang_protobuf//proto:go_default_library",
"@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
@@ -94,7 +96,9 @@ go_test(
size = "small",
srcs = [
"compat_test.go",
+ "fs_test.go",
"loader_test.go",
+ "user_test.go",
],
embed = [":boot"],
deps = [
diff --git a/runsc/boot/compat.go b/runsc/boot/compat.go
index c369e4d64..07e35ab10 100644
--- a/runsc/boot/compat.go
+++ b/runsc/boot/compat.go
@@ -21,14 +21,14 @@ import (
"syscall"
"github.com/golang/protobuf/proto"
- "gvisor.googlesource.com/gvisor/pkg/abi"
- "gvisor.googlesource.com/gvisor/pkg/eventchannel"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- rpb "gvisor.googlesource.com/gvisor/pkg/sentry/arch/registers_go_proto"
- ucspb "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto"
- "gvisor.googlesource.com/gvisor/pkg/sentry/strace"
- spb "gvisor.googlesource.com/gvisor/pkg/sentry/unimpl/unimplemented_syscall_go_proto"
+ "gvisor.dev/gvisor/pkg/abi"
+ "gvisor.dev/gvisor/pkg/eventchannel"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto"
+ ucspb "gvisor.dev/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto"
+ "gvisor.dev/gvisor/pkg/sentry/strace"
+ spb "gvisor.dev/gvisor/pkg/sentry/unimpl/unimplemented_syscall_go_proto"
)
func initCompatLogs(fd int) error {
diff --git a/runsc/boot/compat_amd64.go b/runsc/boot/compat_amd64.go
index 99df5e614..43cd0db94 100644
--- a/runsc/boot/compat_amd64.go
+++ b/runsc/boot/compat_amd64.go
@@ -17,7 +17,7 @@ package boot
import (
"fmt"
- rpb "gvisor.googlesource.com/gvisor/pkg/sentry/arch/registers_go_proto"
+ rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto"
)
// reportLimit is the max number of events that should be reported per tracker.
diff --git a/runsc/boot/compat_test.go b/runsc/boot/compat_test.go
index ccec3d20c..388298d8d 100644
--- a/runsc/boot/compat_test.go
+++ b/runsc/boot/compat_test.go
@@ -17,7 +17,7 @@ package boot
import (
"testing"
- rpb "gvisor.googlesource.com/gvisor/pkg/sentry/arch/registers_go_proto"
+ rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto"
)
func TestOnceTracker(t *testing.T) {
diff --git a/runsc/boot/config.go b/runsc/boot/config.go
index 8564c502d..6f1eb9a41 100644
--- a/runsc/boot/config.go
+++ b/runsc/boot/config.go
@@ -19,43 +19,9 @@ import (
"strconv"
"strings"
- "gvisor.googlesource.com/gvisor/pkg/sentry/watchdog"
+ "gvisor.dev/gvisor/pkg/sentry/watchdog"
)
-// PlatformType tells which platform to use.
-type PlatformType int
-
-const (
- // PlatformPtrace runs the sandbox with the ptrace platform.
- PlatformPtrace PlatformType = iota
-
- // PlatformKVM runs the sandbox with the KVM platform.
- PlatformKVM
-)
-
-// MakePlatformType converts type from string.
-func MakePlatformType(s string) (PlatformType, error) {
- switch s {
- case "ptrace":
- return PlatformPtrace, nil
- case "kvm":
- return PlatformKVM, nil
- default:
- return 0, fmt.Errorf("invalid platform type %q", s)
- }
-}
-
-func (p PlatformType) String() string {
- switch p {
- case PlatformPtrace:
- return "ptrace"
- case PlatformKVM:
- return "kvm"
- default:
- return fmt.Sprintf("unknown(%d)", p)
- }
-}
-
// FileAccessType tells how the filesystem is accessed.
type FileAccessType int
@@ -187,7 +153,7 @@ type Config struct {
LogPackets bool
// Platform is the platform to run on.
- Platform PlatformType
+ Platform string
// Strace indicates that strace should be enabled.
Strace bool
@@ -226,6 +192,12 @@ type Config struct {
// to the same underlying network device. This allows netstack to better
// scale for high throughput use cases.
NumNetworkChannels int
+
+ // Rootless allows the sandbox to be started with a user that is not root.
+ // Defense is depth measures are weaker with rootless. Specifically, the
+ // sandbox and Gofer process run as root inside a user namespace with root
+ // mapped to the caller's user.
+ Rootless bool
}
// ToFlags returns a slice of flags that correspond to the given Config.
@@ -241,7 +213,7 @@ func (c *Config) ToFlags() []string {
"--overlay=" + strconv.FormatBool(c.Overlay),
"--network=" + c.Network.String(),
"--log-packets=" + strconv.FormatBool(c.LogPackets),
- "--platform=" + c.Platform.String(),
+ "--platform=" + c.Platform,
"--strace=" + strconv.FormatBool(c.Strace),
"--strace-syscalls=" + strings.Join(c.StraceSyscalls, ","),
"--strace-log-size=" + strconv.Itoa(int(c.StraceLogSize)),
@@ -250,6 +222,7 @@ func (c *Config) ToFlags() []string {
"--profile=" + strconv.FormatBool(c.ProfileEnable),
"--net-raw=" + strconv.FormatBool(c.EnableRaw),
"--num-network-channels=" + strconv.Itoa(c.NumNetworkChannels),
+ "--rootless=" + strconv.FormatBool(c.Rootless),
}
if c.TestOnlyAllowRunAsCurrentUserWithoutChroot {
// Only include if set since it is never to be used by users.
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go
index a277145b1..d79aaff60 100644
--- a/runsc/boot/controller.go
+++ b/runsc/boot/controller.go
@@ -22,17 +22,17 @@ import (
"syscall"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/control/server"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/control"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/epsocket"
- "gvisor.googlesource.com/gvisor/pkg/sentry/state"
- "gvisor.googlesource.com/gvisor/pkg/sentry/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/watchdog"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
- "gvisor.googlesource.com/gvisor/pkg/urpc"
+ "gvisor.dev/gvisor/pkg/control/server"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/socket/epsocket"
+ "gvisor.dev/gvisor/pkg/sentry/state"
+ "gvisor.dev/gvisor/pkg/sentry/time"
+ "gvisor.dev/gvisor/pkg/sentry/watchdog"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/urpc"
)
const (
@@ -96,8 +96,10 @@ const (
// SandboxStacks collects sandbox stacks for debugging.
SandboxStacks = "debug.Stacks"
+)
- // Profiling related commands (see pprof.go for more details).
+// Profiling related commands (see pprof.go for more details).
+const (
StartCPUProfile = "Profile.StartCPUProfile"
StopCPUProfile = "Profile.StopCPUProfile"
HeapProfile = "Profile.HeapProfile"
@@ -105,6 +107,11 @@ const (
StopTrace = "Profile.StopTrace"
)
+// Logging related commands (see logging.go for more details).
+const (
+ ChangeLogging = "Logging.Change"
+)
+
// ControlSocketAddr generates an abstract unix socket name for the given ID.
func ControlSocketAddr(id string) string {
return fmt.Sprintf("\x00runsc-sandbox.%s", id)
@@ -143,6 +150,7 @@ func newController(fd int, l *Loader) (*controller, error) {
}
srv.Register(&debug{})
+ srv.Register(&control.Logging{})
if l.conf.ProfileEnable {
srv.Register(&control.Profile{})
}
@@ -340,7 +348,7 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
cm.l.k = k
// Set up the restore environment.
- mntr := newContainerMounter(cm.l.spec, "", cm.l.goferFDs, cm.l.k)
+ mntr := newContainerMounter(cm.l.spec, "", cm.l.goferFDs, cm.l.k, cm.l.mountHints)
renv, err := mntr.createRestoreEnvironment(cm.l.conf)
if err != nil {
return fmt.Errorf("creating RestoreEnvironment: %v", err)
@@ -359,6 +367,17 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
return fmt.Errorf("file cannot be empty")
}
+ if cm.l.conf.ProfileEnable {
+ // initializePProf opens /proc/self/maps, so has to be
+ // called before installing seccomp filters.
+ initializePProf()
+ }
+
+ // Seccomp filters have to be applied before parsing the state file.
+ if err := cm.l.installSeccompFilters(); err != nil {
+ return err
+ }
+
// Load the state.
loadOpts := state.LoadOpts{Source: specFile}
if err := loadOpts.Load(k, networkStack); err != nil {
diff --git a/runsc/boot/debug.go b/runsc/boot/debug.go
index 79f7387ac..1fb32c527 100644
--- a/runsc/boot/debug.go
+++ b/runsc/boot/debug.go
@@ -15,7 +15,7 @@
package boot
import (
- "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/log"
)
type debug struct {
diff --git a/runsc/boot/events.go b/runsc/boot/events.go
index ffd99f5e9..422f4da00 100644
--- a/runsc/boot/events.go
+++ b/runsc/boot/events.go
@@ -15,8 +15,8 @@
package boot
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
)
// Event struct for encoding the event data to JSON. Corresponds to runc's
diff --git a/runsc/boot/fds.go b/runsc/boot/fds.go
index 0811e10f4..e5de1f3d7 100644
--- a/runsc/boot/fds.go
+++ b/runsc/boot/fds.go
@@ -17,36 +17,27 @@ package boot
import (
"fmt"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/host"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/limits"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/host"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
)
-// createFDMap creates an FD map that contains stdin, stdout, and stderr. If
-// console is true, then ioctl calls will be passed through to the host FD.
+// createFDTable creates an FD table that contains stdin, stdout, and stderr.
+// If console is true, then ioctl calls will be passed through to the host FD.
// Upon success, createFDMap dups then closes stdioFDs.
-func createFDMap(ctx context.Context, l *limits.LimitSet, console bool, stdioFDs []int) (*kernel.FDMap, error) {
+func createFDTable(ctx context.Context, console bool, stdioFDs []int) (*kernel.FDTable, error) {
if len(stdioFDs) != 3 {
return nil, fmt.Errorf("stdioFDs should contain exactly 3 FDs (stdin, stdout, and stderr), but %d FDs received", len(stdioFDs))
}
k := kernel.KernelFromContext(ctx)
- fdm := k.NewFDMap()
- defer fdm.DecRef()
+ fdTable := k.NewFDTable()
+ defer fdTable.DecRef()
mounter := fs.FileOwnerFromContext(ctx)
- // Maps sandbox FD to host FD.
- fdMap := map[int]int{
- 0: stdioFDs[0],
- 1: stdioFDs[1],
- 2: stdioFDs[2],
- }
-
var ttyFile *fs.File
- for appFD, hostFD := range fdMap {
+ for appFD, hostFD := range stdioFDs {
var appFile *fs.File
if console && appFD < 3 {
@@ -80,11 +71,11 @@ func createFDMap(ctx context.Context, l *limits.LimitSet, console bool, stdioFDs
}
// Add the file to the FD map.
- if err := fdm.NewFDAt(kdefs.FD(appFD), appFile, kernel.FDFlags{}, l); err != nil {
+ if err := fdTable.NewFDAt(ctx, int32(appFD), appFile, kernel.FDFlags{}); err != nil {
return nil, err
}
}
- fdm.IncRef()
- return fdm, nil
+ fdTable.IncRef()
+ return fdTable, nil
}
diff --git a/runsc/boot/filter/BUILD b/runsc/boot/filter/BUILD
index 3b6020cf3..f5509b6b7 100644
--- a/runsc/boot/filter/BUILD
+++ b/runsc/boot/filter/BUILD
@@ -11,7 +11,7 @@ go_library(
"extra_filters_race.go",
"filter.go",
],
- importpath = "gvisor.googlesource.com/gvisor/runsc/boot/filter",
+ importpath = "gvisor.dev/gvisor/runsc/boot/filter",
visibility = [
"//runsc/boot:__subpackages__",
],
@@ -20,8 +20,6 @@ go_library(
"//pkg/log",
"//pkg/seccomp",
"//pkg/sentry/platform",
- "//pkg/sentry/platform/kvm",
- "//pkg/sentry/platform/ptrace",
"//pkg/tcpip/link/fdbased",
"@org_golang_x_sys//unix:go_default_library",
],
diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go
index ef2dbfad2..0ee5b8bbd 100644
--- a/runsc/boot/filter/config.go
+++ b/runsc/boot/filter/config.go
@@ -19,9 +19,9 @@ import (
"syscall"
"golang.org/x/sys/unix"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/seccomp"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/fdbased"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/seccomp"
+ "gvisor.dev/gvisor/pkg/tcpip/link/fdbased"
)
// allowedSyscalls is the set of syscalls executed by the Sentry to the host OS.
@@ -437,29 +437,6 @@ func hostInetFilters() seccomp.SyscallRules {
}
}
-// ptraceFilters returns syscalls made exclusively by the ptrace platform.
-func ptraceFilters() seccomp.SyscallRules {
- return seccomp.SyscallRules{
- unix.SYS_GETCPU: {},
- unix.SYS_SCHED_SETAFFINITY: {},
- syscall.SYS_PTRACE: {},
- syscall.SYS_TGKILL: {},
- syscall.SYS_WAIT4: {},
- }
-}
-
-// kvmFilters returns syscalls made exclusively by the KVM platform.
-func kvmFilters() seccomp.SyscallRules {
- return seccomp.SyscallRules{
- syscall.SYS_ARCH_PRCTL: {},
- syscall.SYS_IOCTL: {},
- syscall.SYS_MMAP: {},
- syscall.SYS_RT_SIGSUSPEND: {},
- syscall.SYS_RT_SIGTIMEDWAIT: {},
- 0xffffffffffffffff: {}, // KVM uses syscall -1 to transition to host.
- }
-}
-
func controlServerFilters(fd int) seccomp.SyscallRules {
return seccomp.SyscallRules{
syscall.SYS_ACCEPT: []seccomp.Rule{
diff --git a/runsc/boot/filter/extra_filters.go b/runsc/boot/filter/extra_filters.go
index 5c5ec4e06..e28d4b8d6 100644
--- a/runsc/boot/filter/extra_filters.go
+++ b/runsc/boot/filter/extra_filters.go
@@ -17,11 +17,11 @@
package filter
import (
- "gvisor.googlesource.com/gvisor/pkg/seccomp"
+ "gvisor.dev/gvisor/pkg/seccomp"
)
// instrumentationFilters returns additional filters for syscalls used by
-// Go intrumentation tools, e.g. -race, -msan.
+// Go instrumentation tools, e.g. -race, -msan.
// Returns empty when disabled.
func instrumentationFilters() seccomp.SyscallRules {
return nil
diff --git a/runsc/boot/filter/extra_filters_msan.go b/runsc/boot/filter/extra_filters_msan.go
index ac5a0f1aa..5e5a3c998 100644
--- a/runsc/boot/filter/extra_filters_msan.go
+++ b/runsc/boot/filter/extra_filters_msan.go
@@ -19,7 +19,7 @@ package filter
import (
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/seccomp"
+ "gvisor.dev/gvisor/pkg/seccomp"
)
// instrumentationFilters returns additional filters for syscalls used by MSAN.
diff --git a/runsc/boot/filter/extra_filters_race.go b/runsc/boot/filter/extra_filters_race.go
index ba3c1ce87..9ff80276a 100644
--- a/runsc/boot/filter/extra_filters_race.go
+++ b/runsc/boot/filter/extra_filters_race.go
@@ -19,7 +19,7 @@ package filter
import (
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/seccomp"
+ "gvisor.dev/gvisor/pkg/seccomp"
)
// instrumentationFilters returns additional filters for syscalls used by TSAN.
@@ -33,6 +33,7 @@ func instrumentationFilters() seccomp.SyscallRules {
syscall.SYS_MUNLOCK: {},
syscall.SYS_NANOSLEEP: {},
syscall.SYS_OPEN: {},
+ syscall.SYS_OPENAT: {},
syscall.SYS_SET_ROBUST_LIST: {},
// Used within glibc's malloc.
syscall.SYS_TIME: {},
diff --git a/runsc/boot/filter/filter.go b/runsc/boot/filter/filter.go
index 17479e0dd..e80c171b3 100644
--- a/runsc/boot/filter/filter.go
+++ b/runsc/boot/filter/filter.go
@@ -18,13 +18,9 @@
package filter
import (
- "fmt"
-
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/seccomp"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/kvm"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ptrace"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/seccomp"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
)
// Options are seccomp filter related options.
@@ -53,14 +49,7 @@ func Install(opt Options) error {
s.Merge(profileFilters())
}
- switch p := opt.Platform.(type) {
- case *ptrace.PTrace:
- s.Merge(ptraceFilters())
- case *kvm.KVM:
- s.Merge(kvmFilters())
- default:
- return fmt.Errorf("unknown platform type %T", p)
- }
+ s.Merge(opt.Platform.SyscallFilters())
return seccomp.Install(s)
}
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index 939f2419c..d3e3196fd 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -18,29 +18,30 @@ import (
"fmt"
"path"
"path/filepath"
+ "sort"
"strconv"
"strings"
"syscall"
// Include filesystem types that OCI spec might mount.
- _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/dev"
- _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/host"
- _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc"
- _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/sys"
- _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/tmpfs"
- _ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/tty"
+ _ "gvisor.dev/gvisor/pkg/sentry/fs/dev"
+ "gvisor.dev/gvisor/pkg/sentry/fs/gofer"
+ _ "gvisor.dev/gvisor/pkg/sentry/fs/host"
+ _ "gvisor.dev/gvisor/pkg/sentry/fs/proc"
+ "gvisor.dev/gvisor/pkg/sentry/fs/ramfs"
+ _ "gvisor.dev/gvisor/pkg/sentry/fs/sys"
+ _ "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs"
+ _ "gvisor.dev/gvisor/pkg/sentry/fs/tty"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/gofer"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/runsc/specutils"
)
const (
@@ -50,6 +51,9 @@ const (
// Device name for root mount.
rootDevice = "9pfs-/"
+ // MountPrefix is the annotation prefix for mount hints.
+ MountPrefix = "gvisor.dev/spec/mount"
+
// ChildContainersDir is the directory where child container root
// filesystems are mounted.
ChildContainersDir = "/__runsc_containers__"
@@ -72,7 +76,7 @@ func addOverlay(ctx context.Context, conf *Config, lower *fs.Inode, name string,
tmpFS := mustFindFilesystem("tmpfs")
if !fs.IsDir(lower.StableAttr) {
// Create overlay on top of mount file, e.g. /etc/hostname.
- msrc := fs.NewCachingMountSource(tmpFS, upperFlags)
+ msrc := fs.NewCachingMountSource(ctx, tmpFS, upperFlags)
return fs.NewOverlayRootFile(ctx, msrc, lower, upperFlags)
}
@@ -222,7 +226,11 @@ func mustFindFilesystem(name string) fs.Filesystem {
// addSubmountOverlay overlays the inode over a ramfs tree containing the given
// paths.
func addSubmountOverlay(ctx context.Context, inode *fs.Inode, submounts []string) (*fs.Inode, error) {
- msrc := fs.NewPseudoMountSource()
+ // Construct a ramfs tree of mount points. The contents never
+ // change, so this can be fully caching. There's no real
+ // filesystem backing this tree, so we set the filesystem to
+ // nil.
+ msrc := fs.NewCachingMountSource(ctx, nil, fs.MountSourceFlags{})
mountTree, err := ramfs.MakeDirectoryTree(ctx, msrc, submounts)
if err != nil {
return nil, fmt.Errorf("creating mount tree: %v", err)
@@ -292,6 +300,174 @@ func (f *fdDispenser) empty() bool {
return len(f.fds) == 0
}
+type shareType int
+
+const (
+ invalid shareType = iota
+
+ // container shareType indicates that the mount is used by a single container.
+ container
+
+ // pod shareType indicates that the mount is used by more than one container
+ // inside the pod.
+ pod
+
+ // shared shareType indicates that the mount can also be shared with a process
+ // outside the pod, e.g. NFS.
+ shared
+)
+
+func parseShare(val string) (shareType, error) {
+ switch val {
+ case "container":
+ return container, nil
+ case "pod":
+ return pod, nil
+ case "shared":
+ return shared, nil
+ default:
+ return 0, fmt.Errorf("invalid share value %q", val)
+ }
+}
+
+func (s shareType) String() string {
+ switch s {
+ case invalid:
+ return "invalid"
+ case container:
+ return "container"
+ case pod:
+ return "pod"
+ case shared:
+ return "shared"
+ default:
+ return fmt.Sprintf("invalid share value %d", s)
+ }
+}
+
+// mountHint represents extra information about mounts that are provided via
+// annotations. They can override mount type, and provide sharing information
+// so that mounts can be correctly shared inside the pod.
+type mountHint struct {
+ name string
+ share shareType
+ mount specs.Mount
+
+ // root is the inode where the volume is mounted. For mounts with 'pod' share
+ // the volume is mounted once and then bind mounted inside the containers.
+ root *fs.Inode
+}
+
+func (m *mountHint) setField(key, val string) error {
+ switch key {
+ case "source":
+ if len(val) == 0 {
+ return fmt.Errorf("source cannot be empty")
+ }
+ m.mount.Source = val
+ case "type":
+ return m.setType(val)
+ case "share":
+ share, err := parseShare(val)
+ if err != nil {
+ return err
+ }
+ m.share = share
+ case "options":
+ return m.setOptions(val)
+ default:
+ return fmt.Errorf("invalid mount annotation: %s=%s", key, val)
+ }
+ return nil
+}
+
+func (m *mountHint) setType(val string) error {
+ switch val {
+ case "tmpfs", "bind":
+ m.mount.Type = val
+ default:
+ return fmt.Errorf("invalid type %q", val)
+ }
+ return nil
+}
+
+func (m *mountHint) setOptions(val string) error {
+ opts := strings.Split(val, ",")
+ if err := specutils.ValidateMountOptions(opts); err != nil {
+ return err
+ }
+ // Sort options so it can be compared with container mount options later on.
+ sort.Strings(opts)
+ m.mount.Options = opts
+ return nil
+}
+
+func (m *mountHint) isSupported() bool {
+ return m.mount.Type == tmpfs && m.share == pod
+}
+
+// podMountHints contains a collection of mountHints for the pod.
+type podMountHints struct {
+ mounts map[string]*mountHint
+}
+
+func newPodMountHints(spec *specs.Spec) (*podMountHints, error) {
+ mnts := make(map[string]*mountHint)
+ for k, v := range spec.Annotations {
+ // Look for 'gvisor.dev/spec/mount' annotations and parse them.
+ if strings.HasPrefix(k, MountPrefix) {
+ parts := strings.Split(k, "/")
+ if len(parts) != 5 {
+ return nil, fmt.Errorf("invalid mount annotation: %s=%s", k, v)
+ }
+ name := parts[3]
+ if len(name) == 0 || path.Clean(name) != name {
+ return nil, fmt.Errorf("invalid mount name: %s", name)
+ }
+ mnt := mnts[name]
+ if mnt == nil {
+ mnt = &mountHint{name: name}
+ mnts[name] = mnt
+ }
+ if err := mnt.setField(parts[4], v); err != nil {
+ return nil, err
+ }
+ }
+ }
+
+ // Validate all hints after done parsing.
+ for name, m := range mnts {
+ log.Infof("Mount annotation found, name: %s, source: %q, type: %s, share: %v", name, m.mount.Source, m.mount.Type, m.share)
+ if m.share == invalid {
+ return nil, fmt.Errorf("share field for %q has not been set", m.name)
+ }
+ if len(m.mount.Source) == 0 {
+ return nil, fmt.Errorf("source field for %q has not been set", m.name)
+ }
+ if len(m.mount.Type) == 0 {
+ return nil, fmt.Errorf("type field for %q has not been set", m.name)
+ }
+
+ // Check for duplicate mount sources.
+ for name2, m2 := range mnts {
+ if name != name2 && m.mount.Source == m2.mount.Source {
+ return nil, fmt.Errorf("mounts %q and %q have the same mount source %q", m.name, m2.name, m.mount.Source)
+ }
+ }
+ }
+
+ return &podMountHints{mounts: mnts}, nil
+}
+
+func (p *podMountHints) findMount(mount specs.Mount) *mountHint {
+ for _, m := range p.mounts {
+ if m.mount.Source == mount.Source {
+ return m
+ }
+ }
+ return nil
+}
+
type containerMounter struct {
// cid is the container ID. May be set to empty for the root container.
cid string
@@ -306,15 +482,18 @@ type containerMounter struct {
fds fdDispenser
k *kernel.Kernel
+
+ hints *podMountHints
}
-func newContainerMounter(spec *specs.Spec, cid string, goferFDs []int, k *kernel.Kernel) *containerMounter {
+func newContainerMounter(spec *specs.Spec, cid string, goferFDs []int, k *kernel.Kernel, hints *podMountHints) *containerMounter {
return &containerMounter{
cid: cid,
root: spec.Root,
mounts: compileMounts(spec),
fds: fdDispenser{fds: goferFDs},
k: k,
+ hints: hints,
}
}
@@ -476,6 +655,15 @@ func destroyContainerFS(ctx context.Context, cid string, k *kernel.Kernel) error
// 'setMountNS' is called after namespace is created. It must set the mount NS
// to 'rootCtx'.
func (c *containerMounter) setupRootContainer(userCtx context.Context, rootCtx context.Context, conf *Config, setMountNS func(*fs.MountNamespace)) error {
+ for _, hint := range c.hints.mounts {
+ log.Infof("Mounting master of shared mount %q from %q type %q", hint.name, hint.mount.Source, hint.mount.Type)
+ inode, err := c.mountSharedMaster(rootCtx, conf, hint)
+ if err != nil {
+ return fmt.Errorf("mounting shared master %q: %v", hint.name, err)
+ }
+ hint.root = inode
+ }
+
// Create a tmpfs mount where we create and mount a root filesystem for
// each child container.
c.mounts = append(c.mounts, specs.Mount{
@@ -498,21 +686,57 @@ func (c *containerMounter) setupRootContainer(userCtx context.Context, rootCtx c
return c.mountSubmounts(rootCtx, conf, mns, root)
}
+// mountSharedMaster mounts the master of a volume that is shared among
+// containers in a pod. It returns the root mount's inode.
+func (c *containerMounter) mountSharedMaster(ctx context.Context, conf *Config, hint *mountHint) (*fs.Inode, error) {
+ // Map mount type to filesystem name, and parse out the options that we are
+ // capable of dealing with.
+ fsName, opts, useOverlay, err := c.getMountNameAndOptions(conf, hint.mount)
+ if err != nil {
+ return nil, err
+ }
+ if len(fsName) == 0 {
+ return nil, fmt.Errorf("mount type not supported %q", hint.mount.Type)
+ }
+
+ // Mount with revalidate because it's shared among containers.
+ opts = append(opts, "cache=revalidate")
+
+ // All filesystem names should have been mapped to something we know.
+ filesystem := mustFindFilesystem(fsName)
+
+ mf := mountFlags(hint.mount.Options)
+ if useOverlay {
+ // All writes go to upper, be paranoid and make lower readonly.
+ mf.ReadOnly = true
+ }
+
+ inode, err := filesystem.Mount(ctx, mountDevice(hint.mount), mf, strings.Join(opts, ","), nil)
+ if err != nil {
+ return nil, fmt.Errorf("creating mount %q: %v", hint.name, err)
+ }
+
+ if useOverlay {
+ log.Debugf("Adding overlay on top of shared mount %q", hint.name)
+ inode, err = addOverlay(ctx, conf, inode, hint.mount.Type, mf)
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ return inode, nil
+}
+
// createRootMount creates the root filesystem.
func (c *containerMounter) createRootMount(ctx context.Context, conf *Config) (*fs.Inode, error) {
// First construct the filesystem from the spec.Root.
mf := fs.MountSourceFlags{ReadOnly: c.root.Readonly || conf.Overlay}
- var (
- rootInode *fs.Inode
- err error
- )
-
fd := c.fds.remove()
log.Infof("Mounting root over 9P, ioFD: %d", fd)
p9FS := mustFindFilesystem("9p")
opts := p9MountOptions(fd, conf.FileAccess)
- rootInode, err = p9FS.Mount(ctx, rootDevice, mf, strings.Join(opts, ","), nil)
+ rootInode, err := p9FS.Mount(ctx, rootDevice, mf, strings.Join(opts, ","), nil)
if err != nil {
return nil, fmt.Errorf("creating root mount point: %v", err)
}
@@ -579,8 +803,14 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) (
func (c *containerMounter) mountSubmounts(ctx context.Context, conf *Config, mns *fs.MountNamespace, root *fs.Dirent) error {
for _, m := range c.mounts {
- if err := c.mountSubmount(ctx, conf, mns, root, m); err != nil {
- return fmt.Errorf("mount submount %q: %v", m.Destination, err)
+ if hint := c.hints.findMount(m); hint != nil && hint.isSupported() {
+ if err := c.mountSharedSubmount(ctx, mns, root, m, hint); err != nil {
+ return fmt.Errorf("mount shared mount %q to %q: %v", hint.name, m.Destination, err)
+ }
+ } else {
+ if err := c.mountSubmount(ctx, conf, mns, root, m); err != nil {
+ return fmt.Errorf("mount submount %q: %v", m.Destination, err)
+ }
}
}
@@ -653,6 +883,40 @@ func (c *containerMounter) mountSubmount(ctx context.Context, conf *Config, mns
return nil
}
+// mountSharedSubmount binds mount to a previously mounted volume that is shared
+// among containers in the same pod.
+func (c *containerMounter) mountSharedSubmount(ctx context.Context, mns *fs.MountNamespace, root *fs.Dirent, mount specs.Mount, source *mountHint) error {
+ // For now enforce that all options are the same. Once bind mount is properly
+ // supported, then we should ensure the master is less restrictive than the
+ // container, e.g. master can be 'rw' while container mounts as 'ro'.
+ if len(mount.Options) != len(source.mount.Options) {
+ return fmt.Errorf("mount options in annotations differ from container mount, annotation: %s, mount: %s", source.mount.Options, mount.Options)
+ }
+ sort.Strings(mount.Options)
+ for i, opt := range mount.Options {
+ if opt != source.mount.Options[i] {
+ return fmt.Errorf("mount options in annotations differ from container mount, annotation: %s, mount: %s", source.mount.Options, mount.Options)
+ }
+ }
+
+ maxTraversals := uint(0)
+ target, err := mns.FindInode(ctx, root, root, mount.Destination, &maxTraversals)
+ if err != nil {
+ return fmt.Errorf("can't find mount destination %q: %v", mount.Destination, err)
+ }
+ defer target.DecRef()
+
+ // Take a ref on the inode that is about to be (re)-mounted.
+ source.root.IncRef()
+ if err := mns.Mount(ctx, target, source.root); err != nil {
+ source.root.DecRef()
+ return fmt.Errorf("bind mount %q error: %v", mount.Destination, err)
+ }
+
+ log.Infof("Mounted %q type shared bind to %q", mount.Destination, source.name)
+ return nil
+}
+
// addRestoreMount adds a mount to the MountSources map used for restoring a
// checkpointed container.
func (c *containerMounter) addRestoreMount(conf *Config, renv *fs.RestoreEnvironment, m specs.Mount) error {
@@ -678,8 +942,8 @@ func (c *containerMounter) addRestoreMount(conf *Config, renv *fs.RestoreEnviron
return nil
}
-// createRestoreEnvironment builds a fs.RestoreEnvironment called renv by adding the mounts
-// to the environment.
+// createRestoreEnvironment builds a fs.RestoreEnvironment called renv by adding
+// the mounts to the environment.
func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEnvironment, error) {
renv := &fs.RestoreEnvironment{
MountSources: make(map[string][]fs.MountArgs),
@@ -730,7 +994,7 @@ func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEn
// Technically we don't have to mount tmpfs at /tmp, as we could just rely on
// the host /tmp, but this is a nice optimization, and fixes some apps that call
// mknod in /tmp. It's unsafe to mount tmpfs if:
-// 1. /tmp is mounted explictly: we should not override user's wish
+// 1. /tmp is mounted explicitly: we should not override user's wish
// 2. /tmp is not empty: mounting tmpfs would hide existing files in /tmp
//
// Note that when there are submounts inside of '/tmp', directories for the
diff --git a/runsc/boot/fs_test.go b/runsc/boot/fs_test.go
new file mode 100644
index 000000000..49ab34b33
--- /dev/null
+++ b/runsc/boot/fs_test.go
@@ -0,0 +1,193 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package boot
+
+import (
+ "path"
+ "reflect"
+ "strings"
+ "testing"
+
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+)
+
+func TestPodMountHintsHappy(t *testing.T) {
+ spec := &specs.Spec{
+ Annotations: map[string]string{
+ path.Join(MountPrefix, "mount1", "source"): "foo",
+ path.Join(MountPrefix, "mount1", "type"): "tmpfs",
+ path.Join(MountPrefix, "mount1", "share"): "pod",
+
+ path.Join(MountPrefix, "mount2", "source"): "bar",
+ path.Join(MountPrefix, "mount2", "type"): "bind",
+ path.Join(MountPrefix, "mount2", "share"): "container",
+ path.Join(MountPrefix, "mount2", "options"): "rw,private",
+ },
+ }
+ podHints, err := newPodMountHints(spec)
+ if err != nil {
+ t.Errorf("newPodMountHints failed: %v", err)
+ }
+
+ // Check that fields were set correctly.
+ mount1 := podHints.mounts["mount1"]
+ if want := "mount1"; want != mount1.name {
+ t.Errorf("mount1 name, want: %q, got: %q", want, mount1.name)
+ }
+ if want := "foo"; want != mount1.mount.Source {
+ t.Errorf("mount1 source, want: %q, got: %q", want, mount1.mount.Source)
+ }
+ if want := "tmpfs"; want != mount1.mount.Type {
+ t.Errorf("mount1 type, want: %q, got: %q", want, mount1.mount.Type)
+ }
+ if want := pod; want != mount1.share {
+ t.Errorf("mount1 type, want: %q, got: %q", want, mount1.share)
+ }
+ if want := []string(nil); !reflect.DeepEqual(want, mount1.mount.Options) {
+ t.Errorf("mount1 type, want: %q, got: %q", want, mount1.mount.Options)
+ }
+
+ mount2 := podHints.mounts["mount2"]
+ if want := "mount2"; want != mount2.name {
+ t.Errorf("mount2 name, want: %q, got: %q", want, mount2.name)
+ }
+ if want := "bar"; want != mount2.mount.Source {
+ t.Errorf("mount2 source, want: %q, got: %q", want, mount2.mount.Source)
+ }
+ if want := "bind"; want != mount2.mount.Type {
+ t.Errorf("mount2 type, want: %q, got: %q", want, mount2.mount.Type)
+ }
+ if want := container; want != mount2.share {
+ t.Errorf("mount2 type, want: %q, got: %q", want, mount2.share)
+ }
+ if want := []string{"private", "rw"}; !reflect.DeepEqual(want, mount2.mount.Options) {
+ t.Errorf("mount2 type, want: %q, got: %q", want, mount2.mount.Options)
+ }
+}
+
+func TestPodMountHintsErrors(t *testing.T) {
+ for _, tst := range []struct {
+ name string
+ annotations map[string]string
+ error string
+ }{
+ {
+ name: "too short",
+ annotations: map[string]string{
+ path.Join(MountPrefix, "mount1"): "foo",
+ },
+ error: "invalid mount annotation",
+ },
+ {
+ name: "no name",
+ annotations: map[string]string{
+ MountPrefix + "//source": "foo",
+ },
+ error: "invalid mount name",
+ },
+ {
+ name: "missing source",
+ annotations: map[string]string{
+ path.Join(MountPrefix, "mount1", "type"): "tmpfs",
+ path.Join(MountPrefix, "mount1", "share"): "pod",
+ },
+ error: "source field",
+ },
+ {
+ name: "missing type",
+ annotations: map[string]string{
+ path.Join(MountPrefix, "mount1", "source"): "foo",
+ path.Join(MountPrefix, "mount1", "share"): "pod",
+ },
+ error: "type field",
+ },
+ {
+ name: "missing share",
+ annotations: map[string]string{
+ path.Join(MountPrefix, "mount1", "source"): "foo",
+ path.Join(MountPrefix, "mount1", "type"): "tmpfs",
+ },
+ error: "share field",
+ },
+ {
+ name: "invalid field name",
+ annotations: map[string]string{
+ path.Join(MountPrefix, "mount1", "invalid"): "foo",
+ },
+ error: "invalid mount annotation",
+ },
+ {
+ name: "invalid source",
+ annotations: map[string]string{
+ path.Join(MountPrefix, "mount1", "source"): "",
+ path.Join(MountPrefix, "mount1", "type"): "tmpfs",
+ path.Join(MountPrefix, "mount1", "share"): "pod",
+ },
+ error: "source cannot be empty",
+ },
+ {
+ name: "invalid type",
+ annotations: map[string]string{
+ path.Join(MountPrefix, "mount1", "source"): "foo",
+ path.Join(MountPrefix, "mount1", "type"): "invalid-type",
+ path.Join(MountPrefix, "mount1", "share"): "pod",
+ },
+ error: "invalid type",
+ },
+ {
+ name: "invalid share",
+ annotations: map[string]string{
+ path.Join(MountPrefix, "mount1", "source"): "foo",
+ path.Join(MountPrefix, "mount1", "type"): "tmpfs",
+ path.Join(MountPrefix, "mount1", "share"): "invalid-share",
+ },
+ error: "invalid share",
+ },
+ {
+ name: "invalid options",
+ annotations: map[string]string{
+ path.Join(MountPrefix, "mount1", "source"): "foo",
+ path.Join(MountPrefix, "mount1", "type"): "tmpfs",
+ path.Join(MountPrefix, "mount1", "share"): "pod",
+ path.Join(MountPrefix, "mount1", "options"): "invalid-option",
+ },
+ error: "unknown mount option",
+ },
+ {
+ name: "duplicate source",
+ annotations: map[string]string{
+ path.Join(MountPrefix, "mount1", "source"): "foo",
+ path.Join(MountPrefix, "mount1", "type"): "tmpfs",
+ path.Join(MountPrefix, "mount1", "share"): "pod",
+
+ path.Join(MountPrefix, "mount2", "source"): "foo",
+ path.Join(MountPrefix, "mount2", "type"): "bind",
+ path.Join(MountPrefix, "mount2", "share"): "container",
+ },
+ error: "have the same mount source",
+ },
+ } {
+ t.Run(tst.name, func(t *testing.T) {
+ spec := &specs.Spec{Annotations: tst.annotations}
+ podHints, err := newPodMountHints(spec)
+ if err == nil || !strings.Contains(err.Error(), tst.error) {
+ t.Errorf("newPodMountHints invalid error, want: .*%s.*, got: %v", tst.error, err)
+ }
+ if podHints != nil {
+ t.Errorf("newPodMountHints must return nil on failure: %+v", podHints)
+ }
+ })
+ }
+}
diff --git a/runsc/boot/limits.go b/runsc/boot/limits.go
index 3364aa5e6..d1c0bb9b5 100644
--- a/runsc/boot/limits.go
+++ b/runsc/boot/limits.go
@@ -20,8 +20,8 @@ import (
"syscall"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/limits"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
)
// Mapping from linux resource names to limits.LimitType.
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index 42bddb2e8..8e8c6105b 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -20,51 +20,52 @@ import (
mrand "math/rand"
"os"
"runtime"
+ "strings"
"sync"
"sync/atomic"
"syscall"
gtime "time"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/cpuid"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/memutil"
- "gvisor.googlesource.com/gvisor/pkg/rand"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/control"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/host"
- "gvisor.googlesource.com/gvisor/pkg/sentry/inet"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/sentry/loader"
- "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/kvm"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ptrace"
- "gvisor.googlesource.com/gvisor/pkg/sentry/sighandling"
- slinux "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
- "gvisor.googlesource.com/gvisor/pkg/sentry/watchdog"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/arp"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/icmp"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/tcp"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/transport/udp"
- "gvisor.googlesource.com/gvisor/runsc/boot/filter"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/cpuid"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/memutil"
+ "gvisor.dev/gvisor/pkg/rand"
+ "gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/pkg/sentry/fs/host"
+ "gvisor.dev/gvisor/pkg/sentry/inet"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/loader"
+ "gvisor.dev/gvisor/pkg/sentry/pgalloc"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/sighandling"
+ slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux"
+ "gvisor.dev/gvisor/pkg/sentry/time"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/sentry/watchdog"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/link/sniffer"
+ "gvisor.dev/gvisor/pkg/tcpip/network/arp"
+ "gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
+ "gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/icmp"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/tcp"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/udp"
+ "gvisor.dev/gvisor/runsc/boot/filter"
+ _ "gvisor.dev/gvisor/runsc/boot/platforms" // register all platforms.
+ "gvisor.dev/gvisor/runsc/specutils"
// Include supported socket providers.
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/epsocket"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/hostinet"
- _ "gvisor.googlesource.com/gvisor/pkg/sentry/socket/netlink"
- _ "gvisor.googlesource.com/gvisor/pkg/sentry/socket/netlink/route"
- _ "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix"
+ "gvisor.dev/gvisor/pkg/sentry/socket/epsocket"
+ "gvisor.dev/gvisor/pkg/sentry/socket/hostinet"
+ _ "gvisor.dev/gvisor/pkg/sentry/socket/netlink"
+ _ "gvisor.dev/gvisor/pkg/sentry/socket/netlink/route"
+ _ "gvisor.dev/gvisor/pkg/sentry/socket/unix"
)
// Loader keeps state needed to start the kernel and run the container..
@@ -117,6 +118,10 @@ type Loader struct {
//
// processes is guardded by mu.
processes map[execID]*execProcess
+
+ // mountHints provides extra information about mounts for containers that
+ // apply to the entire pod.
+ mountHints *podMountHints
}
// execID uniquely identifies a sentry process that is executed in a container.
@@ -201,7 +206,9 @@ func New(args Args) (*Loader, error) {
// Create VDSO.
//
// Pass k as the platform since it is savable, unlike the actual platform.
- vdso, err := loader.PrepareVDSO(k)
+ //
+ // FIXME(b/109889800): Use non-nil context.
+ vdso, err := loader.PrepareVDSO(nil, k)
if err != nil {
return nil, fmt.Errorf("creating vdso: %v", err)
}
@@ -255,7 +262,7 @@ func New(args Args) (*Loader, error) {
// Adjust the total memory returned by the Sentry so that applications that
// use /proc/meminfo can make allocations based on this limit.
usage.MinimumTotalMemoryBytes = args.TotalMem
- log.Infof("Setting total memory to %.2f GB", float64(args.TotalMem)/(2^30))
+ log.Infof("Setting total memory to %.2f GB", float64(args.TotalMem)/(1<<30))
}
// Initiate the Kernel object, which is required by the Context passed
@@ -299,6 +306,11 @@ func New(args Args) (*Loader, error) {
return nil, fmt.Errorf("initializing compat logs: %v", err)
}
+ mountHints, err := newPodMountHints(args.Spec)
+ if err != nil {
+ return nil, fmt.Errorf("creating pod mount hints: %v", err)
+ }
+
eid := execID{cid: args.ID}
l := &Loader{
k: k,
@@ -311,6 +323,7 @@ func New(args Args) (*Loader, error) {
rootProcArgs: procArgs,
sandboxID: args.ID,
processes: map[execID]*execProcess{eid: {}},
+ mountHints: mountHints,
}
// We don't care about child signals; some platforms can generate a
@@ -402,19 +415,12 @@ func (l *Loader) Destroy() {
}
func createPlatform(conf *Config, deviceFile *os.File) (platform.Platform, error) {
- switch conf.Platform {
- case PlatformPtrace:
- log.Infof("Platform: ptrace")
- return ptrace.New()
- case PlatformKVM:
- log.Infof("Platform: kvm")
- if deviceFile == nil {
- return nil, fmt.Errorf("kvm device file must be provided")
- }
- return kvm.New(deviceFile)
- default:
- return nil, fmt.Errorf("invalid platform %v", conf.Platform)
+ p, err := platform.Lookup(conf.Platform)
+ if err != nil {
+ panic(fmt.Sprintf("invalid platform %v: %v", conf.Platform, err))
}
+ log.Infof("Platform: %s", conf.Platform)
+ return p.New(deviceFile)
}
func createMemoryFile() (*pgalloc.MemoryFile, error) {
@@ -435,6 +441,23 @@ func createMemoryFile() (*pgalloc.MemoryFile, error) {
return mf, nil
}
+func (l *Loader) installSeccompFilters() error {
+ if l.conf.DisableSeccomp {
+ filter.Report("syscall filter is DISABLED. Running in less secure mode.")
+ } else {
+ opts := filter.Options{
+ Platform: l.k.Platform,
+ HostNetwork: l.conf.Network == NetworkHost,
+ ProfileEnable: l.conf.ProfileEnable,
+ ControllerFD: l.ctrl.srv.FD(),
+ }
+ if err := filter.Install(opts); err != nil {
+ return fmt.Errorf("installing seccomp filters: %v", err)
+ }
+ }
+ return nil
+}
+
// Run runs the root container.
func (l *Loader) Run() error {
err := l.run()
@@ -470,39 +493,33 @@ func (l *Loader) run() error {
return fmt.Errorf("trying to start deleted container %q", l.sandboxID)
}
- // Finally done with all configuration. Setup filters before user code
- // is loaded.
- if l.conf.DisableSeccomp {
- filter.Report("syscall filter is DISABLED. Running in less secure mode.")
- } else {
- opts := filter.Options{
- Platform: l.k.Platform,
- HostNetwork: l.conf.Network == NetworkHost,
- ProfileEnable: l.conf.ProfileEnable,
- ControllerFD: l.ctrl.srv.FD(),
- }
- if err := filter.Install(opts); err != nil {
- return fmt.Errorf("installing seccomp filters: %v", err)
- }
- }
-
// If we are restoring, we do not want to create a process.
// l.restore is set by the container manager when a restore call is made.
if !l.restore {
+ if l.conf.ProfileEnable {
+ initializePProf()
+ }
+
+ // Finally done with all configuration. Setup filters before user code
+ // is loaded.
+ if err := l.installSeccompFilters(); err != nil {
+ return err
+ }
+
// Create the FD map, which will set stdin, stdout, and stderr. If console
// is true, then ioctl calls will be passed through to the host fd.
ctx := l.rootProcArgs.NewContext(l.k)
- fdm, err := createFDMap(ctx, l.rootProcArgs.Limits, l.console, l.stdioFDs)
+ fdTable, err := createFDTable(ctx, l.console, l.stdioFDs)
if err != nil {
return fmt.Errorf("importing fds: %v", err)
}
// CreateProcess takes a reference on FDMap if successful. We won't need
// ours either way.
- l.rootProcArgs.FDMap = fdm
+ l.rootProcArgs.FDTable = fdTable
// cid for root container can be empty. Only subcontainers need it to set
// the mount location.
- mntr := newContainerMounter(l.spec, "", l.goferFDs, l.k)
+ mntr := newContainerMounter(l.spec, "", l.goferFDs, l.k, l.mountHints)
if err := mntr.setupFS(ctx, l.conf, &l.rootProcArgs, l.rootProcArgs.Credentials); err != nil {
return err
}
@@ -513,19 +530,37 @@ func (l *Loader) run() error {
return err
}
+ // Read /etc/passwd for the user's HOME directory and set the HOME
+ // environment variable as required by POSIX if it is not overridden by
+ // the user.
+ hasHomeEnvv := false
+ for _, envv := range l.rootProcArgs.Envv {
+ if strings.HasPrefix(envv, "HOME=") {
+ hasHomeEnvv = true
+ }
+ }
+ if !hasHomeEnvv {
+ homeDir, err := getExecUserHome(rootCtx, rootMns, uint32(l.rootProcArgs.Credentials.RealKUID))
+ if err != nil {
+ return fmt.Errorf("error reading exec user: %v", err)
+ }
+
+ l.rootProcArgs.Envv = append(l.rootProcArgs.Envv, "HOME="+homeDir)
+ }
+
// Create the root container init task. It will begin running
// when the kernel is started.
if _, _, err := l.k.CreateProcess(l.rootProcArgs); err != nil {
return fmt.Errorf("creating init process: %v", err)
}
- // CreateProcess takes a reference on FDMap if successful.
- l.rootProcArgs.FDMap.DecRef()
+ // CreateProcess takes a reference on FDTable if successful.
+ l.rootProcArgs.FDTable.DecRef()
}
ep.tg = l.k.GlobalInit()
if l.console {
- ttyFile := l.rootProcArgs.FDMap.GetFile(0)
+ ttyFile, _ := l.rootProcArgs.FDTable.Get(0)
defer ttyFile.DecRef()
ep.tty = ttyFile.FileOperations.(*host.TTYFileOperations)
@@ -605,13 +640,13 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file
// Create the FD map, which will set stdin, stdout, and stderr.
ctx := procArgs.NewContext(l.k)
- fdm, err := createFDMap(ctx, procArgs.Limits, false, stdioFDs)
+ fdTable, err := createFDTable(ctx, false, stdioFDs)
if err != nil {
return fmt.Errorf("importing fds: %v", err)
}
- // CreateProcess takes a reference on FDMap if successful. We won't need ours
- // either way.
- procArgs.FDMap = fdm
+ // CreateProcess takes a reference on fdTable if successful. We won't
+ // need ours either way.
+ procArgs.FDTable = fdTable
// Can't take ownership away from os.File. dup them to get a new FDs.
var goferFDs []int
@@ -623,7 +658,7 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file
goferFDs = append(goferFDs, fd)
}
- mntr := newContainerMounter(spec, cid, goferFDs, l.k)
+ mntr := newContainerMounter(spec, cid, goferFDs, l.k, l.mountHints)
if err := mntr.setupFS(ctx, conf, &procArgs, creds); err != nil {
return fmt.Errorf("configuring container FS: %v", err)
}
@@ -640,8 +675,8 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file
}
l.k.StartProcess(tg)
- // CreateProcess takes a reference on FDMap if successful.
- procArgs.FDMap.DecRef()
+ // CreateProcess takes a reference on FDTable if successful.
+ procArgs.FDTable.DecRef()
l.processes[eid].tg = tg
return nil
@@ -805,9 +840,17 @@ func newEmptyNetworkStack(conf *Config, clock tcpip.Clock) (inet.Stack, error) {
// privileges.
Raw: true,
})}
+
+ // Enable SACK Recovery.
if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(true)); err != nil {
return nil, fmt.Errorf("failed to enable SACK: %v", err)
}
+
+ // Enable Receive Buffer Auto-Tuning.
+ if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(true)); err != nil {
+ return nil, fmt.Errorf("SetTransportProtocolOption failed: %v", err)
+ }
+
return &s, nil
default:
@@ -956,3 +999,8 @@ func (l *Loader) threadGroupFromIDLocked(key execID) (*kernel.ThreadGroup, *host
}
return ep.tg, ep.tty, nil
}
+
+func init() {
+ // TODO(gvisor.dev/issue/365): Make this configurable.
+ refs.SetLeakMode(refs.NoLeakChecking)
+}
diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go
index 6393cb3fb..ff713660d 100644
--- a/runsc/boot/loader_test.go
+++ b/runsc/boot/loader_test.go
@@ -25,18 +25,21 @@ import (
"time"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/control/server"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/p9"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context/contexttest"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/unet"
- "gvisor.googlesource.com/gvisor/runsc/fsgofer"
+ "gvisor.dev/gvisor/pkg/control/server"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/unet"
+ "gvisor.dev/gvisor/runsc/fsgofer"
)
func init() {
log.SetLevel(log.Debug)
rand.Seed(time.Now().UnixNano())
+ if err := fsgofer.OpenProcSelfFD(); err != nil {
+ panic(err)
+ }
}
func testConfig() *Config {
@@ -44,6 +47,7 @@ func testConfig() *Config {
RootDir: "unused_root_dir",
Network: NetworkNone,
DisableSeccomp: true,
+ Platform: "ptrace",
}
}
@@ -404,7 +408,7 @@ func TestCreateMountNamespace(t *testing.T) {
mns = m
ctx.(*contexttest.TestContext).RegisterValue(fs.CtxRoot, mns.Root())
}
- mntr := newContainerMounter(&tc.spec, "", []int{sandEnd}, nil)
+ mntr := newContainerMounter(&tc.spec, "", []int{sandEnd}, nil, &podMountHints{})
if err := mntr.setupRootContainer(ctx, ctx, conf, setMountNS); err != nil {
t.Fatalf("createMountNamespace test case %q failed: %v", tc.name, err)
}
@@ -610,7 +614,7 @@ func TestRestoreEnvironment(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
conf := testConfig()
- mntr := newContainerMounter(tc.spec, "", tc.ioFDs, nil)
+ mntr := newContainerMounter(tc.spec, "", tc.ioFDs, nil, &podMountHints{})
actualRenv, err := mntr.createRestoreEnvironment(conf)
if !tc.errorExpected && err != nil {
t.Fatalf("could not create restore environment for test:%s", tc.name)
diff --git a/runsc/boot/network.go b/runsc/boot/network.go
index 82c259f47..d3d98243d 100644
--- a/runsc/boot/network.go
+++ b/runsc/boot/network.go
@@ -19,16 +19,16 @@ import (
"net"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/fdbased"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/loopback"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/sniffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/arp"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv4"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/network/ipv6"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
- "gvisor.googlesource.com/gvisor/pkg/urpc"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/link/fdbased"
+ "gvisor.dev/gvisor/pkg/tcpip/link/loopback"
+ "gvisor.dev/gvisor/pkg/tcpip/link/sniffer"
+ "gvisor.dev/gvisor/pkg/tcpip/network/arp"
+ "gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
+ "gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/urpc"
)
// Network exposes methods that can be used to configure a network stack.
@@ -56,7 +56,7 @@ type FDBasedLink struct {
Addresses []net.IP
Routes []Route
GSOMaxSize uint32
- LinkAddress []byte
+ LinkAddress net.HardwareAddr
// NumChannels controls how many underlying FD's are to be used to
// create this endpoint.
diff --git a/runsc/boot/platforms/BUILD b/runsc/boot/platforms/BUILD
new file mode 100644
index 000000000..03391cdca
--- /dev/null
+++ b/runsc/boot/platforms/BUILD
@@ -0,0 +1,16 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+
+package(licenses = ["notice"])
+
+go_library(
+ name = "platforms",
+ srcs = ["platforms.go"],
+ importpath = "gvisor.dev/gvisor/runsc/boot/platforms",
+ visibility = [
+ "//runsc:__subpackages__",
+ ],
+ deps = [
+ "//pkg/sentry/platform/kvm",
+ "//pkg/sentry/platform/ptrace",
+ ],
+)
diff --git a/runsc/boot/platforms/platforms.go b/runsc/boot/platforms/platforms.go
new file mode 100644
index 000000000..056b46ad5
--- /dev/null
+++ b/runsc/boot/platforms/platforms.go
@@ -0,0 +1,30 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package platforms imports all available platform packages.
+package platforms
+
+import (
+ // Import platforms that runsc might use.
+ _ "gvisor.dev/gvisor/pkg/sentry/platform/kvm"
+ _ "gvisor.dev/gvisor/pkg/sentry/platform/ptrace"
+)
+
+const (
+ // Ptrace runs the sandbox with the ptrace platform.
+ Ptrace = "ptrace"
+
+ // KVM runs the sandbox with the KVM platform.
+ KVM = "kvm"
+)
diff --git a/pkg/sentry/kernel/kdefs/kdefs.go b/runsc/boot/pprof.go
index 304da2032..463362f02 100644
--- a/pkg/sentry/kernel/kdefs/kdefs.go
+++ b/runsc/boot/pprof.go
@@ -1,4 +1,4 @@
-// Copyright 2018 The gVisor Authors.
+// Copyright 2019 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -12,9 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-// Package kdefs defines common kernel definitions.
-//
-package kdefs
+package boot
-// FD is a File Descriptor.
-type FD int32
+func initializePProf() {
+}
diff --git a/runsc/boot/strace.go b/runsc/boot/strace.go
index 19c7f8fbd..fbfd3b07c 100644
--- a/runsc/boot/strace.go
+++ b/runsc/boot/strace.go
@@ -15,7 +15,7 @@
package boot
import (
- "gvisor.googlesource.com/gvisor/pkg/sentry/strace"
+ "gvisor.dev/gvisor/pkg/sentry/strace"
)
func enableStrace(conf *Config) error {
diff --git a/runsc/boot/user.go b/runsc/boot/user.go
new file mode 100644
index 000000000..d1d423a5c
--- /dev/null
+++ b/runsc/boot/user.go
@@ -0,0 +1,146 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package boot
+
+import (
+ "bufio"
+ "io"
+ "strconv"
+ "strings"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+)
+
+type fileReader struct {
+ // Ctx is the context for the file reader.
+ Ctx context.Context
+
+ // File is the file to read from.
+ File *fs.File
+}
+
+// Read implements io.Reader.Read.
+func (r *fileReader) Read(buf []byte) (int, error) {
+ n, err := r.File.Readv(r.Ctx, usermem.BytesIOSequence(buf))
+ return int(n), err
+}
+
+// getExecUserHome returns the home directory of the executing user read from
+// /etc/passwd as read from the container filesystem.
+func getExecUserHome(ctx context.Context, rootMns *fs.MountNamespace, uid uint32) (string, error) {
+ // The default user home directory to return if no user matching the user
+ // if found in the /etc/passwd found in the image.
+ const defaultHome = "/"
+
+ // Open the /etc/passwd file from the dirent via the root mount namespace.
+ mnsRoot := rootMns.Root()
+ maxTraversals := uint(linux.MaxSymlinkTraversals)
+ dirent, err := rootMns.FindInode(ctx, mnsRoot, nil, "/etc/passwd", &maxTraversals)
+ if err != nil {
+ // NOTE: Ignore errors opening the passwd file. If the passwd file
+ // doesn't exist we will return the default home directory.
+ return defaultHome, nil
+ }
+ defer dirent.DecRef()
+
+ // Check read permissions on the file.
+ if err := dirent.Inode.CheckPermission(ctx, fs.PermMask{Read: true}); err != nil {
+ // NOTE: Ignore permissions errors here and return default root dir.
+ return defaultHome, nil
+ }
+
+ // Only open regular files. We don't open other files like named pipes as
+ // they may block and might present some attack surface to the container.
+ // Note that runc does not seem to do this kind of checking.
+ if !fs.IsRegular(dirent.Inode.StableAttr) {
+ return defaultHome, nil
+ }
+
+ f, err := dirent.Inode.GetFile(ctx, dirent, fs.FileFlags{Read: true, Directory: false})
+ if err != nil {
+ return "", err
+ }
+ defer f.DecRef()
+
+ r := &fileReader{
+ Ctx: ctx,
+ File: f,
+ }
+
+ homeDir, err := findHomeInPasswd(uid, r, defaultHome)
+ if err != nil {
+ return "", err
+ }
+
+ return homeDir, nil
+}
+
+// findHomeInPasswd parses a passwd file and returns the given user's home
+// directory. This function does it's best to replicate the runc's behavior.
+func findHomeInPasswd(uid uint32, passwd io.Reader, defaultHome string) (string, error) {
+ s := bufio.NewScanner(passwd)
+
+ for s.Scan() {
+ if err := s.Err(); err != nil {
+ return "", err
+ }
+
+ line := strings.TrimSpace(s.Text())
+ if line == "" {
+ continue
+ }
+
+ // Pull out part of passwd entry. Loosely parse the passwd entry as some
+ // passwd files could be poorly written and for compatibility with runc.
+ //
+ // Per 'man 5 passwd'
+ // /etc/passwd contains one line for each user account, with seven
+ // fields delimited by colons (“:”). These fields are:
+ //
+ // - login name
+ // - optional encrypted password
+ // - numerical user ID
+ // - numerical group ID
+ // - user name or comment field
+ // - user home directory
+ // - optional user command interpreter
+ parts := strings.Split(line, ":")
+
+ found := false
+ homeDir := ""
+ for i, p := range parts {
+ switch i {
+ case 2:
+ parsedUID, err := strconv.ParseUint(p, 10, 32)
+ if err == nil && parsedUID == uint64(uid) {
+ found = true
+ }
+ case 5:
+ homeDir = p
+ }
+ }
+ if found {
+ // NOTE: If the uid is present but the home directory is not
+ // present in the /etc/passwd entry we return an empty string. This
+ // is, for better or worse, what runc does.
+ return homeDir, nil
+ }
+ }
+
+ return defaultHome, nil
+}
diff --git a/runsc/boot/user_test.go b/runsc/boot/user_test.go
new file mode 100644
index 000000000..834003430
--- /dev/null
+++ b/runsc/boot/user_test.go
@@ -0,0 +1,253 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package boot
+
+import (
+ "io/ioutil"
+ "os"
+ "path/filepath"
+ "strings"
+ "syscall"
+ "testing"
+
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+ "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+)
+
+func setupTempDir() (string, error) {
+ tmpDir, err := ioutil.TempDir(os.TempDir(), "exec-user-test")
+ if err != nil {
+ return "", err
+ }
+ return tmpDir, nil
+}
+
+func setupPasswd(contents string, perms os.FileMode) func() (string, error) {
+ return func() (string, error) {
+ tmpDir, err := setupTempDir()
+ if err != nil {
+ return "", err
+ }
+
+ if err := os.Mkdir(filepath.Join(tmpDir, "etc"), 0777); err != nil {
+ return "", err
+ }
+
+ f, err := os.Create(filepath.Join(tmpDir, "etc", "passwd"))
+ if err != nil {
+ return "", err
+ }
+ defer f.Close()
+
+ _, err = f.WriteString(contents)
+ if err != nil {
+ return "", err
+ }
+
+ err = f.Chmod(perms)
+ if err != nil {
+ return "", err
+ }
+ return tmpDir, nil
+ }
+}
+
+// TestGetExecUserHome tests the getExecUserHome function.
+func TestGetExecUserHome(t *testing.T) {
+ tests := map[string]struct {
+ uid uint32
+ createRoot func() (string, error)
+ expected string
+ }{
+ "success": {
+ uid: 1000,
+ createRoot: setupPasswd("adin::1000:1111::/home/adin:/bin/sh", 0666),
+ expected: "/home/adin",
+ },
+ "no_passwd": {
+ uid: 1000,
+ createRoot: setupTempDir,
+ expected: "/",
+ },
+ "no_perms": {
+ uid: 1000,
+ createRoot: setupPasswd("adin::1000:1111::/home/adin:/bin/sh", 0000),
+ expected: "/",
+ },
+ "directory": {
+ uid: 1000,
+ createRoot: func() (string, error) {
+ tmpDir, err := setupTempDir()
+ if err != nil {
+ return "", err
+ }
+
+ if err := os.Mkdir(filepath.Join(tmpDir, "etc"), 0777); err != nil {
+ return "", err
+ }
+
+ if err := syscall.Mkdir(filepath.Join(tmpDir, "etc", "passwd"), 0666); err != nil {
+ return "", err
+ }
+
+ return tmpDir, nil
+ },
+ expected: "/",
+ },
+ // Currently we don't allow named pipes.
+ "named_pipe": {
+ uid: 1000,
+ createRoot: func() (string, error) {
+ tmpDir, err := setupTempDir()
+ if err != nil {
+ return "", err
+ }
+
+ if err := os.Mkdir(filepath.Join(tmpDir, "etc"), 0777); err != nil {
+ return "", err
+ }
+
+ if err := syscall.Mkfifo(filepath.Join(tmpDir, "etc", "passwd"), 0666); err != nil {
+ return "", err
+ }
+
+ return tmpDir, nil
+ },
+ expected: "/",
+ },
+ }
+
+ for name, tc := range tests {
+ t.Run(name, func(t *testing.T) {
+ tmpDir, err := tc.createRoot()
+ if err != nil {
+ t.Fatalf("failed to create root dir: %v", err)
+ }
+
+ sandEnd, cleanup, err := startGofer(tmpDir)
+ if err != nil {
+ t.Fatalf("failed to create gofer: %v", err)
+ }
+ defer cleanup()
+
+ ctx := contexttest.Context(t)
+ conf := &Config{
+ RootDir: "unused_root_dir",
+ Network: NetworkNone,
+ DisableSeccomp: true,
+ }
+
+ spec := &specs.Spec{
+ Root: &specs.Root{
+ Path: tmpDir,
+ Readonly: true,
+ },
+ // Add /proc mount as tmpfs to avoid needing a kernel.
+ Mounts: []specs.Mount{
+ {
+ Destination: "/proc",
+ Type: "tmpfs",
+ },
+ },
+ }
+
+ var mns *fs.MountNamespace
+ setMountNS := func(m *fs.MountNamespace) {
+ mns = m
+ ctx.(*contexttest.TestContext).RegisterValue(fs.CtxRoot, mns.Root())
+ }
+ mntr := newContainerMounter(spec, "", []int{sandEnd}, nil, &podMountHints{})
+ if err := mntr.setupRootContainer(ctx, ctx, conf, setMountNS); err != nil {
+ t.Fatalf("failed to create mount namespace: %v", err)
+ }
+
+ got, err := getExecUserHome(ctx, mns, tc.uid)
+ if err != nil {
+ t.Fatalf("failed to get user home: %v", err)
+ }
+
+ if got != tc.expected {
+ t.Fatalf("expected %v, got: %v", tc.expected, got)
+ }
+ })
+ }
+}
+
+// TestFindHomeInPasswd tests the findHomeInPasswd function's passwd file parsing.
+func TestFindHomeInPasswd(t *testing.T) {
+ tests := map[string]struct {
+ uid uint32
+ passwd string
+ expected string
+ def string
+ }{
+ "empty": {
+ uid: 1000,
+ passwd: "",
+ expected: "/",
+ def: "/",
+ },
+ "whitespace": {
+ uid: 1000,
+ passwd: " ",
+ expected: "/",
+ def: "/",
+ },
+ "full": {
+ uid: 1000,
+ passwd: "adin::1000:1111::/home/adin:/bin/sh",
+ expected: "/home/adin",
+ def: "/",
+ },
+ // For better or worse, this is how runc works.
+ "partial": {
+ uid: 1000,
+ passwd: "adin::1000:1111:",
+ expected: "",
+ def: "/",
+ },
+ "multiple": {
+ uid: 1001,
+ passwd: "adin::1000:1111::/home/adin:/bin/sh\nian::1001:1111::/home/ian:/bin/sh",
+ expected: "/home/ian",
+ def: "/",
+ },
+ "duplicate": {
+ uid: 1000,
+ passwd: "adin::1000:1111::/home/adin:/bin/sh\nian::1000:1111::/home/ian:/bin/sh",
+ expected: "/home/adin",
+ def: "/",
+ },
+ "empty_lines": {
+ uid: 1001,
+ passwd: "adin::1000:1111::/home/adin:/bin/sh\n\n\nian::1001:1111::/home/ian:/bin/sh",
+ expected: "/home/ian",
+ def: "/",
+ },
+ }
+
+ for name, tc := range tests {
+ t.Run(name, func(t *testing.T) {
+ got, err := findHomeInPasswd(tc.uid, strings.NewReader(tc.passwd), tc.def)
+ if err != nil {
+ t.Fatalf("error parsing passwd: %v", err)
+ }
+ if tc.expected != got {
+ t.Fatalf("expected %v, got: %v", tc.expected, got)
+ }
+ })
+ }
+}
diff --git a/runsc/cgroup/BUILD b/runsc/cgroup/BUILD
index 620d33a19..ab2387614 100644
--- a/runsc/cgroup/BUILD
+++ b/runsc/cgroup/BUILD
@@ -5,7 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "cgroup",
srcs = ["cgroup.go"],
- importpath = "gvisor.googlesource.com/gvisor/runsc/cgroup",
+ importpath = "gvisor.dev/gvisor/runsc/cgroup",
visibility = [
"//runsc:__subpackages__",
],
diff --git a/runsc/cgroup/cgroup.go b/runsc/cgroup/cgroup.go
index 7431b17d6..ab3a25b9b 100644
--- a/runsc/cgroup/cgroup.go
+++ b/runsc/cgroup/cgroup.go
@@ -30,8 +30,8 @@ import (
"github.com/cenkalti/backoff"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/runsc/specutils"
)
const (
diff --git a/runsc/cmd/BUILD b/runsc/cmd/BUILD
index df6af0ced..5223b9972 100644
--- a/runsc/cmd/BUILD
+++ b/runsc/cmd/BUILD
@@ -33,7 +33,7 @@ go_library(
"syscalls.go",
"wait.go",
],
- importpath = "gvisor.googlesource.com/gvisor/runsc/cmd",
+ importpath = "gvisor.dev/gvisor/runsc/cmd",
visibility = [
"//runsc:__subpackages__",
],
@@ -46,6 +46,7 @@ go_library(
"//pkg/unet",
"//pkg/urpc",
"//runsc/boot",
+ "//runsc/boot/platforms",
"//runsc/console",
"//runsc/container",
"//runsc/fsgofer",
diff --git a/runsc/cmd/boot.go b/runsc/cmd/boot.go
index 3a547d4aa..b40fded5b 100644
--- a/runsc/cmd/boot.go
+++ b/runsc/cmd/boot.go
@@ -24,9 +24,10 @@ import (
"flag"
"github.com/google/subcommands"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/boot/platforms"
+ "gvisor.dev/gvisor/runsc/specutils"
)
// Boot implements subcommands.Command for the "boot" command which starts a
@@ -130,6 +131,8 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
// Ensure that if there is a panic, all goroutine stacks are printed.
debug.SetTraceback("all")
+ conf := args[0].(*boot.Config)
+
if b.setUpRoot {
if err := setUpChroot(b.pidns); err != nil {
Fatalf("error setting up chroot: %v", err)
@@ -143,14 +146,16 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
args = append(args, arg)
}
}
- // Note that we've already read the spec from the spec FD, and
- // we will read it again after the exec call. This works
- // because the ReadSpecFromFile function seeks to the beginning
- // of the file before reading.
- if err := callSelfAsNobody(args); err != nil {
- Fatalf("%v", err)
+ if !conf.Rootless {
+ // Note that we've already read the spec from the spec FD, and
+ // we will read it again after the exec call. This works
+ // because the ReadSpecFromFile function seeks to the beginning
+ // of the file before reading.
+ if err := callSelfAsNobody(args); err != nil {
+ Fatalf("%v", err)
+ }
+ panic("callSelfAsNobody must never return success")
}
- panic("callSelfAsNobody must never return success")
}
}
@@ -163,15 +168,12 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
}
specutils.LogSpec(spec)
- conf := args[0].(*boot.Config)
- waitStatus := args[1].(*syscall.WaitStatus)
-
if b.applyCaps {
caps := spec.Process.Capabilities
if caps == nil {
caps = &specs.LinuxCapabilities{}
}
- if conf.Platform == boot.PlatformPtrace {
+ if conf.Platform == platforms.Ptrace {
// Ptrace platform requires extra capabilities.
const c = "CAP_SYS_PTRACE"
caps.Bounding = append(caps.Bounding, c)
@@ -251,6 +253,7 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
ws := l.WaitExit()
log.Infof("application exiting with %+v", ws)
+ waitStatus := args[1].(*syscall.WaitStatus)
*waitStatus = syscall.WaitStatus(ws.Status())
l.Destroy()
return subcommands.ExitSuccess
diff --git a/runsc/cmd/capability.go b/runsc/cmd/capability.go
index 312e5b471..abfbb7cfc 100644
--- a/runsc/cmd/capability.go
+++ b/runsc/cmd/capability.go
@@ -19,7 +19,7 @@ import (
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/syndtr/gocapability/capability"
- "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/log"
)
var allCapTypes = []capability.CapType{
diff --git a/runsc/cmd/capability_test.go b/runsc/cmd/capability_test.go
index ee74d33d8..3ae25a257 100644
--- a/runsc/cmd/capability_test.go
+++ b/runsc/cmd/capability_test.go
@@ -21,11 +21,11 @@ import (
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/syndtr/gocapability/capability"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
func init() {
@@ -97,7 +97,12 @@ func TestCapabilities(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create and start the container.
- c, err := container.Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := container.Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ c, err := container.New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -116,6 +121,6 @@ func TestCapabilities(t *testing.T) {
}
func TestMain(m *testing.M) {
- testutil.RunAsRoot()
+ specutils.MaybeRunAsRoot()
os.Exit(m.Run())
}
diff --git a/runsc/cmd/checkpoint.go b/runsc/cmd/checkpoint.go
index 96d3c3378..d8b3a8573 100644
--- a/runsc/cmd/checkpoint.go
+++ b/runsc/cmd/checkpoint.go
@@ -22,10 +22,10 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/specutils"
)
// File containing the container's saved image/state within the given image-path's directory.
@@ -133,7 +133,12 @@ func (c *Checkpoint) Execute(_ context.Context, f *flag.FlagSet, args ...interfa
Fatalf("destroying container: %v", err)
}
- cont, err = container.Create(id, spec, conf, bundleDir, "", "", "")
+ contArgs := container.Args{
+ ID: id,
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont, err = container.New(conf, contArgs)
if err != nil {
Fatalf("restoring container: %v", err)
}
diff --git a/runsc/cmd/chroot.go b/runsc/cmd/chroot.go
index 1a774db04..b5a0ce17d 100644
--- a/runsc/cmd/chroot.go
+++ b/runsc/cmd/chroot.go
@@ -20,8 +20,8 @@ import (
"path/filepath"
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/runsc/specutils"
)
// mountInChroot creates the destination mount point in the given chroot and
diff --git a/runsc/cmd/cmd.go b/runsc/cmd/cmd.go
index 5b4cc4a39..f1a4887ef 100644
--- a/runsc/cmd/cmd.go
+++ b/runsc/cmd/cmd.go
@@ -22,8 +22,8 @@ import (
"syscall"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/runsc/specutils"
)
// intFlags can be used with int flags that appear multiple times.
diff --git a/runsc/cmd/create.go b/runsc/cmd/create.go
index 8bf9b7dcf..a4e3071b3 100644
--- a/runsc/cmd/create.go
+++ b/runsc/cmd/create.go
@@ -18,9 +18,9 @@ import (
"context"
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/specutils"
)
// Create implements subcommands.Command for the "create" command.
@@ -82,21 +82,33 @@ func (c *Create) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}
id := f.Arg(0)
conf := args[0].(*boot.Config)
+ if conf.Rootless {
+ return Errorf("Rootless mode not supported with %q", c.Name())
+ }
+
bundleDir := c.bundleDir
if bundleDir == "" {
bundleDir = getwdOrDie()
}
spec, err := specutils.ReadSpec(bundleDir)
if err != nil {
- Fatalf("reading spec: %v", err)
+ return Errorf("reading spec: %v", err)
}
specutils.LogSpec(spec)
// Create the container. A new sandbox will be created for the
// container unless the metadata specifies that it should be run in an
// existing container.
- if _, err := container.Create(id, spec, conf, bundleDir, c.consoleSocket, c.pidFile, c.userLog); err != nil {
- Fatalf("creating container: %v", err)
+ contArgs := container.Args{
+ ID: id,
+ Spec: spec,
+ BundleDir: bundleDir,
+ ConsoleSocket: c.consoleSocket,
+ PIDFile: c.pidFile,
+ UserLog: c.userLog,
+ }
+ if _, err := container.New(conf, contArgs); err != nil {
+ return Errorf("creating container: %v", err)
}
return subcommands.ExitSuccess
}
diff --git a/runsc/cmd/debug.go b/runsc/cmd/debug.go
index 27eb51172..7313e473f 100644
--- a/runsc/cmd/debug.go
+++ b/runsc/cmd/debug.go
@@ -17,14 +17,17 @@ package cmd
import (
"context"
"os"
+ "strconv"
+ "strings"
"syscall"
"time"
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
)
// Debug implements subcommands.Command for the "debug" command.
@@ -36,6 +39,9 @@ type Debug struct {
profileCPU string
profileDelay int
trace string
+ strace string
+ logLevel string
+ logPackets string
}
// Name implements subcommands.Command.
@@ -62,6 +68,9 @@ func (d *Debug) SetFlags(f *flag.FlagSet) {
f.IntVar(&d.profileDelay, "profile-delay", 5, "amount of time to wait before stoping CPU profile")
f.StringVar(&d.trace, "trace", "", "writes an execution trace to the given file.")
f.IntVar(&d.signal, "signal", -1, "sends signal to the sandbox")
+ f.StringVar(&d.strace, "strace", "", `A comma separated list of syscalls to trace. "all" enables all traces, "off" disables all`)
+ f.StringVar(&d.logLevel, "log-level", "", "The log level to set: warning (0), info (1), or debug (2).")
+ f.StringVar(&d.logPackets, "log-packets", "", "A boolean value to enable or disable packet logging: true or false.")
}
// Execute implements subcommands.Command.Execute.
@@ -78,7 +87,7 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
var err error
c, err = container.Load(conf.RootDir, f.Arg(0))
if err != nil {
- Fatalf("loading container %q: %v", f.Arg(0), err)
+ return Errorf("loading container %q: %v", f.Arg(0), err)
}
} else {
if f.NArg() != 0 {
@@ -88,12 +97,12 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
// Go over all sandboxes and find the one that matches PID.
ids, err := container.List(conf.RootDir)
if err != nil {
- Fatalf("listing containers: %v", err)
+ return Errorf("listing containers: %v", err)
}
for _, id := range ids {
candidate, err := container.Load(conf.RootDir, id)
if err != nil {
- Fatalf("loading container %q: %v", id, err)
+ return Errorf("loading container %q: %v", id, err)
}
if candidate.SandboxPid() == d.pid {
c = candidate
@@ -101,38 +110,38 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
}
}
if c == nil {
- Fatalf("container with PID %d not found", d.pid)
+ return Errorf("container with PID %d not found", d.pid)
}
}
if c.Sandbox == nil || !c.Sandbox.IsRunning() {
- Fatalf("container sandbox is not running")
+ return Errorf("container sandbox is not running")
}
log.Infof("Found sandbox %q, PID: %d", c.Sandbox.ID, c.Sandbox.Pid)
if d.signal > 0 {
log.Infof("Sending signal %d to process: %d", d.signal, c.Sandbox.Pid)
if err := syscall.Kill(c.Sandbox.Pid, syscall.Signal(d.signal)); err != nil {
- Fatalf("failed to send signal %d to processs %d", d.signal, c.Sandbox.Pid)
+ return Errorf("failed to send signal %d to processs %d", d.signal, c.Sandbox.Pid)
}
}
if d.stacks {
log.Infof("Retrieving sandbox stacks")
stacks, err := c.Sandbox.Stacks()
if err != nil {
- Fatalf("retrieving stacks: %v", err)
+ return Errorf("retrieving stacks: %v", err)
}
log.Infof(" *** Stack dump ***\n%s", stacks)
}
if d.profileHeap != "" {
f, err := os.Create(d.profileHeap)
if err != nil {
- Fatalf(err.Error())
+ return Errorf(err.Error())
}
defer f.Close()
if err := c.Sandbox.HeapProfile(f); err != nil {
- Fatalf(err.Error())
+ return Errorf(err.Error())
}
log.Infof("Heap profile written to %q", d.profileHeap)
}
@@ -142,7 +151,7 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
delay = true
f, err := os.Create(d.profileCPU)
if err != nil {
- Fatalf(err.Error())
+ return Errorf(err.Error())
}
defer func() {
f.Close()
@@ -152,7 +161,7 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
log.Infof("CPU profile written to %q", d.profileCPU)
}()
if err := c.Sandbox.StartCPUProfile(f); err != nil {
- Fatalf(err.Error())
+ return Errorf(err.Error())
}
log.Infof("CPU profile started for %d sec, writing to %q", d.profileDelay, d.profileCPU)
}
@@ -160,7 +169,7 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
delay = true
f, err := os.Create(d.trace)
if err != nil {
- Fatalf(err.Error())
+ return Errorf(err.Error())
}
defer func() {
f.Close()
@@ -170,15 +179,71 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
log.Infof("Trace written to %q", d.trace)
}()
if err := c.Sandbox.StartTrace(f); err != nil {
- Fatalf(err.Error())
+ return Errorf(err.Error())
}
log.Infof("Tracing started for %d sec, writing to %q", d.profileDelay, d.trace)
}
+ if d.strace != "" || len(d.logLevel) != 0 || len(d.logPackets) != 0 {
+ args := control.LoggingArgs{}
+ switch strings.ToLower(d.strace) {
+ case "":
+ // strace not set, nothing to do here.
+
+ case "off":
+ log.Infof("Disabling strace")
+ args.SetStrace = true
+
+ case "all":
+ log.Infof("Enabling all straces")
+ args.SetStrace = true
+ args.EnableStrace = true
+
+ default:
+ log.Infof("Enabling strace for syscalls: %s", d.strace)
+ args.SetStrace = true
+ args.EnableStrace = true
+ args.StraceWhitelist = strings.Split(d.strace, ",")
+ }
+
+ if len(d.logLevel) != 0 {
+ args.SetLevel = true
+ switch strings.ToLower(d.logLevel) {
+ case "warning", "0":
+ args.Level = log.Warning
+ case "info", "1":
+ args.Level = log.Info
+ case "debug", "2":
+ args.Level = log.Debug
+ default:
+ return Errorf("invalid log level %q", d.logLevel)
+ }
+ log.Infof("Setting log level %v", args.Level)
+ }
+
+ if len(d.logPackets) != 0 {
+ args.SetLogPackets = true
+ lp, err := strconv.ParseBool(d.logPackets)
+ if err != nil {
+ return Errorf("invalid value for log_packets %q", d.logPackets)
+ }
+ args.LogPackets = lp
+ if args.LogPackets {
+ log.Infof("Enabling packet logging")
+ } else {
+ log.Infof("Disabling packet logging")
+ }
+ }
+
+ if err := c.Sandbox.ChangeLogging(args); err != nil {
+ return Errorf(err.Error())
+ }
+ log.Infof("Logging options changed")
+ }
+
if delay {
time.Sleep(time.Duration(d.profileDelay) * time.Second)
-
}
return subcommands.ExitSuccess
diff --git a/runsc/cmd/delete.go b/runsc/cmd/delete.go
index 9039723e9..30d8164b1 100644
--- a/runsc/cmd/delete.go
+++ b/runsc/cmd/delete.go
@@ -21,9 +21,9 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
)
// Delete implements subcommands.Command for the "delete" command.
diff --git a/runsc/cmd/delete_test.go b/runsc/cmd/delete_test.go
index 45fc91016..cb59516a3 100644
--- a/runsc/cmd/delete_test.go
+++ b/runsc/cmd/delete_test.go
@@ -18,7 +18,7 @@ import (
"io/ioutil"
"testing"
- "gvisor.googlesource.com/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/boot"
)
func TestNotFound(t *testing.T) {
diff --git a/runsc/cmd/do.go b/runsc/cmd/do.go
index 8ea59046c..9a8a49054 100644
--- a/runsc/cmd/do.go
+++ b/runsc/cmd/do.go
@@ -30,19 +30,19 @@ import (
"flag"
"github.com/google/subcommands"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/specutils"
)
// Do implements subcommands.Command for the "do" command. It sets up a simple
// sandbox and executes the command inside it. See Usage() for more details.
type Do struct {
- root string
- cwd string
- ip string
- networkNamespace bool
+ root string
+ cwd string
+ ip string
+ quiet bool
}
// Name implements subcommands.Command.Name.
@@ -72,7 +72,7 @@ func (c *Do) SetFlags(f *flag.FlagSet) {
f.StringVar(&c.root, "root", "/", `path to the root directory, defaults to "/"`)
f.StringVar(&c.cwd, "cwd", ".", "path to the current directory, defaults to the current directory")
f.StringVar(&c.ip, "ip", "192.168.10.2", "IPv4 address for the sandbox")
- f.BoolVar(&c.networkNamespace, "netns", true, "run in a new network namespace")
+ f.BoolVar(&c.quiet, "quiet", false, "suppress runsc messages to stdout. Application output is still sent to stdout and stderr")
}
// Execute implements subcommands.Command.Execute.
@@ -85,15 +85,21 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su
conf := args[0].(*boot.Config)
waitStatus := args[1].(*syscall.WaitStatus)
- // Map the entire host file system, but make it readonly with a writable
- // overlay on top (ignore --overlay option).
- conf.Overlay = true
+ if conf.Rootless {
+ if err := specutils.MaybeRunAsRoot(); err != nil {
+ return Errorf("Error executing inside namespace: %v", err)
+ }
+ // Execution will continue here if no more capabilities are needed...
+ }
hostname, err := os.Hostname()
if err != nil {
return Errorf("Error to retrieve hostname: %v", err)
}
+ // Map the entire host file system, but make it readonly with a writable
+ // overlay on top (ignore --overlay option).
+ conf.Overlay = true
absRoot, err := resolvePath(c.root)
if err != nil {
return Errorf("Error resolving root: %v", err)
@@ -119,11 +125,22 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su
specutils.LogSpec(spec)
cid := fmt.Sprintf("runsc-%06d", rand.Int31n(1000000))
- if !c.networkNamespace {
- if conf.Network != boot.NetworkHost {
- Fatalf("The current network namespace can be used only if --network=host is set", nil)
+ if conf.Network == boot.NetworkNone {
+ netns := specs.LinuxNamespace{
+ Type: specs.NetworkNamespace,
}
- } else if conf.Network != boot.NetworkNone {
+ if spec.Linux != nil {
+ panic("spec.Linux is not nil")
+ }
+ spec.Linux = &specs.Linux{Namespaces: []specs.LinuxNamespace{netns}}
+
+ } else if conf.Rootless {
+ if conf.Network == boot.NetworkSandbox {
+ c.notifyUser("*** Warning: using host network due to --rootless ***")
+ conf.Network = boot.NetworkHost
+ }
+
+ } else {
clean, err := c.setupNet(cid, spec)
if err != nil {
return Errorf("Error setting up network: %v", err)
@@ -149,7 +166,13 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su
return Errorf("Error write spec: %v", err)
}
- ws, err := container.Run(cid, spec, conf, tmpDir, "", "", "", false)
+ runArgs := container.Args{
+ ID: cid,
+ Spec: spec,
+ BundleDir: tmpDir,
+ Attached: true,
+ }
+ ws, err := container.Run(conf, runArgs)
if err != nil {
return Errorf("running container: %v", err)
}
@@ -158,6 +181,13 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su
return subcommands.ExitSuccess
}
+func (c *Do) notifyUser(format string, v ...interface{}) {
+ if !c.quiet {
+ fmt.Printf(format+"\n", v...)
+ }
+ log.Warningf(format, v...)
+}
+
func resolvePath(path string) (string, error) {
var err error
path, err = filepath.Abs(path)
diff --git a/runsc/cmd/error.go b/runsc/cmd/error.go
index 700b19f14..3585b5448 100644
--- a/runsc/cmd/error.go
+++ b/runsc/cmd/error.go
@@ -22,7 +22,7 @@ import (
"time"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/log"
)
// ErrorLogger is where error messages should be written to. These messages are
diff --git a/runsc/cmd/events.go b/runsc/cmd/events.go
index c6bc8fc3a..3972e9224 100644
--- a/runsc/cmd/events.go
+++ b/runsc/cmd/events.go
@@ -22,9 +22,9 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
)
// Events implements subcommands.Command for the "events" command.
diff --git a/runsc/cmd/exec.go b/runsc/cmd/exec.go
index 0eeaaadba..e817eff77 100644
--- a/runsc/cmd/exec.go
+++ b/runsc/cmd/exec.go
@@ -30,14 +30,14 @@ import (
"flag"
"github.com/google/subcommands"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/control"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/urpc"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/console"
- "gvisor.googlesource.com/gvisor/runsc/container"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/urpc"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/console"
+ "gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/specutils"
)
// Exec implements subcommands.Command for the "exec" command.
@@ -235,7 +235,11 @@ func (ex *Exec) execChildAndWait(waitStatus *syscall.WaitStatus) subcommands.Exi
cmd.SysProcAttr = &syscall.SysProcAttr{
Setsid: true,
Setctty: true,
- Ctty: int(tty.Fd()),
+ // The Ctty FD must be the FD in the child process's FD
+ // table. Since we set cmd.Stdin/Stdout/Stderr to the
+ // tty FD, we can use any of 0, 1, or 2 here.
+ // See https://github.com/golang/go/issues/29458.
+ Ctty: 0,
}
}
diff --git a/runsc/cmd/exec_test.go b/runsc/cmd/exec_test.go
index 6f0f258c0..eb38a431f 100644
--- a/runsc/cmd/exec_test.go
+++ b/runsc/cmd/exec_test.go
@@ -21,10 +21,10 @@ import (
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/sentry/control"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/urpc"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/urpc"
)
func TestUser(t *testing.T) {
diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go
index bccb29397..9faabf494 100644
--- a/runsc/cmd/gofer.go
+++ b/runsc/cmd/gofer.go
@@ -27,13 +27,13 @@ import (
"flag"
"github.com/google/subcommands"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/p9"
- "gvisor.googlesource.com/gvisor/pkg/unet"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/fsgofer"
- "gvisor.googlesource.com/gvisor/runsc/fsgofer/filter"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/pkg/unet"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/fsgofer"
+ "gvisor.dev/gvisor/runsc/fsgofer/filter"
+ "gvisor.dev/gvisor/runsc/specutils"
)
var caps = []string{
@@ -152,6 +152,10 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
// modes exactly as sent by the sandbox, which will have applied its own umask.
syscall.Umask(0)
+ if err := fsgofer.OpenProcSelfFD(); err != nil {
+ Fatalf("failed to open /proc/self/fd: %v", err)
+ }
+
if err := syscall.Chroot(root); err != nil {
Fatalf("failed to chroot to %q: %v", root, err)
}
diff --git a/runsc/cmd/kill.go b/runsc/cmd/kill.go
index aed5f3291..6c1f197a6 100644
--- a/runsc/cmd/kill.go
+++ b/runsc/cmd/kill.go
@@ -24,8 +24,8 @@ import (
"flag"
"github.com/google/subcommands"
"golang.org/x/sys/unix"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
)
// Kill implements subcommands.Command for the "kill" command.
diff --git a/runsc/cmd/list.go b/runsc/cmd/list.go
index 1f5ca2473..dd2d99a6b 100644
--- a/runsc/cmd/list.go
+++ b/runsc/cmd/list.go
@@ -25,8 +25,8 @@ import (
"flag"
"github.com/google/subcommands"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
)
// List implements subcommands.Command for the "list" command for the "list" command.
diff --git a/runsc/cmd/pause.go b/runsc/cmd/pause.go
index 11b36aa10..9c0e92001 100644
--- a/runsc/cmd/pause.go
+++ b/runsc/cmd/pause.go
@@ -19,8 +19,8 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
)
// Pause implements subcommands.Command for the "pause" command.
diff --git a/runsc/cmd/ps.go b/runsc/cmd/ps.go
index 3a3e6f17a..45c644f3f 100644
--- a/runsc/cmd/ps.go
+++ b/runsc/cmd/ps.go
@@ -20,9 +20,9 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/pkg/sentry/control"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
)
// PS implements subcommands.Command for the "ps" command.
diff --git a/runsc/cmd/restore.go b/runsc/cmd/restore.go
index 3ab2f5676..7be60cd7d 100644
--- a/runsc/cmd/restore.go
+++ b/runsc/cmd/restore.go
@@ -21,9 +21,9 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/specutils"
)
// Restore implements subcommands.Command for the "restore" command.
@@ -80,25 +80,38 @@ func (r *Restore) Execute(_ context.Context, f *flag.FlagSet, args ...interface{
conf := args[0].(*boot.Config)
waitStatus := args[1].(*syscall.WaitStatus)
+ if conf.Rootless {
+ return Errorf("Rootless mode not supported with %q", r.Name())
+ }
+
bundleDir := r.bundleDir
if bundleDir == "" {
bundleDir = getwdOrDie()
}
spec, err := specutils.ReadSpec(bundleDir)
if err != nil {
- Fatalf("reading spec: %v", err)
+ return Errorf("reading spec: %v", err)
}
specutils.LogSpec(spec)
if r.imagePath == "" {
- Fatalf("image-path flag must be provided")
+ return Errorf("image-path flag must be provided")
}
conf.RestoreFile = filepath.Join(r.imagePath, checkpointFileName)
- ws, err := container.Run(id, spec, conf, bundleDir, r.consoleSocket, r.pidFile, r.userLog, r.detach)
+ runArgs := container.Args{
+ ID: id,
+ Spec: spec,
+ BundleDir: bundleDir,
+ ConsoleSocket: r.consoleSocket,
+ PIDFile: r.pidFile,
+ UserLog: r.userLog,
+ Attached: !r.detach,
+ }
+ ws, err := container.Run(conf, runArgs)
if err != nil {
- Fatalf("running container: %v", err)
+ return Errorf("running container: %v", err)
}
*waitStatus = ws
diff --git a/runsc/cmd/resume.go b/runsc/cmd/resume.go
index 9a2ade41e..b2df5c640 100644
--- a/runsc/cmd/resume.go
+++ b/runsc/cmd/resume.go
@@ -19,8 +19,8 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
)
// Resume implements subcommands.Command for the "resume" command.
diff --git a/runsc/cmd/run.go b/runsc/cmd/run.go
index c228b4f93..33f4bc12b 100644
--- a/runsc/cmd/run.go
+++ b/runsc/cmd/run.go
@@ -20,9 +20,9 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/specutils"
)
// Run implements subcommands.Command for the "run" command.
@@ -67,19 +67,32 @@ func (r *Run) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) s
conf := args[0].(*boot.Config)
waitStatus := args[1].(*syscall.WaitStatus)
+ if conf.Rootless {
+ return Errorf("Rootless mode not supported with %q", r.Name())
+ }
+
bundleDir := r.bundleDir
if bundleDir == "" {
bundleDir = getwdOrDie()
}
spec, err := specutils.ReadSpec(bundleDir)
if err != nil {
- Fatalf("reading spec: %v", err)
+ return Errorf("reading spec: %v", err)
}
specutils.LogSpec(spec)
- ws, err := container.Run(id, spec, conf, bundleDir, r.consoleSocket, r.pidFile, r.userLog, r.detach)
+ runArgs := container.Args{
+ ID: id,
+ Spec: spec,
+ BundleDir: bundleDir,
+ ConsoleSocket: r.consoleSocket,
+ PIDFile: r.pidFile,
+ UserLog: r.userLog,
+ Attached: !r.detach,
+ }
+ ws, err := container.Run(conf, runArgs)
if err != nil {
- Fatalf("running container: %v", err)
+ return Errorf("running container: %v", err)
}
*waitStatus = ws
diff --git a/runsc/cmd/start.go b/runsc/cmd/start.go
index 31e8f42bb..de2115dff 100644
--- a/runsc/cmd/start.go
+++ b/runsc/cmd/start.go
@@ -18,8 +18,8 @@ import (
"context"
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
)
// Start implements subcommands.Command for the "start" command.
diff --git a/runsc/cmd/state.go b/runsc/cmd/state.go
index f0d449b19..e9f41cbd8 100644
--- a/runsc/cmd/state.go
+++ b/runsc/cmd/state.go
@@ -21,9 +21,9 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
)
// State implements subcommands.Command for the "state" command.
diff --git a/runsc/cmd/syscalls.go b/runsc/cmd/syscalls.go
index 9c8a66490..fb6c1ab29 100644
--- a/runsc/cmd/syscalls.go
+++ b/runsc/cmd/syscalls.go
@@ -27,7 +27,7 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
)
// Syscalls implements subcommands.Command for the "syscalls" command.
@@ -41,7 +41,7 @@ type Syscalls struct {
// Maps operating system to architecture to ArchInfo.
type CompatibilityInfo map[string]map[string]ArchInfo
-// ArchInfo is compatbility doc for an architecture.
+// ArchInfo is compatibility doc for an architecture.
type ArchInfo struct {
// Syscalls maps syscall number for the architecture to the doc.
Syscalls map[uintptr]SyscallDoc `json:"syscalls"`
diff --git a/runsc/cmd/wait.go b/runsc/cmd/wait.go
index 58fd01974..046489687 100644
--- a/runsc/cmd/wait.go
+++ b/runsc/cmd/wait.go
@@ -22,8 +22,8 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/container"
)
const (
diff --git a/runsc/console/BUILD b/runsc/console/BUILD
index 3ff9eba27..e623c1a0f 100644
--- a/runsc/console/BUILD
+++ b/runsc/console/BUILD
@@ -4,8 +4,10 @@ package(licenses = ["notice"])
go_library(
name = "console",
- srcs = ["console.go"],
- importpath = "gvisor.googlesource.com/gvisor/runsc/console",
+ srcs = [
+ "console.go",
+ ],
+ importpath = "gvisor.dev/gvisor/runsc/console",
visibility = [
"//runsc:__subpackages__",
],
diff --git a/runsc/container/BUILD b/runsc/container/BUILD
index 13709a0ae..e246c38ae 100644
--- a/runsc/container/BUILD
+++ b/runsc/container/BUILD
@@ -9,7 +9,7 @@ go_library(
"hook.go",
"status.go",
],
- importpath = "gvisor.googlesource.com/gvisor/runsc/container",
+ importpath = "gvisor.dev/gvisor/runsc/container",
visibility = [
"//runsc:__subpackages__",
"//test:__subpackages__",
@@ -53,6 +53,7 @@ go_test(
"//pkg/unet",
"//pkg/urpc",
"//runsc/boot",
+ "//runsc/boot/platforms",
"//runsc/specutils",
"//runsc/test/testutil",
"@com_github_cenkalti_backoff//:go_default_library",
diff --git a/runsc/container/console_test.go b/runsc/container/console_test.go
index d016533e6..e9372989f 100644
--- a/runsc/container/console_test.go
+++ b/runsc/container/console_test.go
@@ -27,10 +27,10 @@ import (
"github.com/kr/pty"
"golang.org/x/sys/unix"
- "gvisor.googlesource.com/gvisor/pkg/sentry/control"
- "gvisor.googlesource.com/gvisor/pkg/unet"
- "gvisor.googlesource.com/gvisor/pkg/urpc"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/pkg/unet"
+ "gvisor.dev/gvisor/pkg/urpc"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
// socketPath creates a path inside bundleDir and ensures that the returned
@@ -138,8 +138,13 @@ func TestConsoleSocket(t *testing.T) {
defer cleanup()
// Create the container and pass the socket name.
- id := testutil.UniqueContainerID()
- c, err := Create(id, spec, conf, bundleDir, sock, "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ ConsoleSocket: sock,
+ }
+ c, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -167,7 +172,12 @@ func TestJobControlSignalExec(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create and start the container.
- c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ c, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -186,7 +196,7 @@ func TestJobControlSignalExec(t *testing.T) {
defer ptySlave.Close()
// Exec bash and attach a terminal.
- args := &control.ExecArgs{
+ execArgs := &control.ExecArgs{
Filename: "/bin/bash",
// Don't let bash execute from profile or rc files, otherwise
// our PID counts get messed up.
@@ -198,7 +208,7 @@ func TestJobControlSignalExec(t *testing.T) {
StdioIsPty: true,
}
- pid, err := c.Execute(args)
+ pid, err := c.Execute(execArgs)
if err != nil {
t.Fatalf("error executing: %v", err)
}
@@ -296,8 +306,13 @@ func TestJobControlSignalRootContainer(t *testing.T) {
defer cleanup()
// Create the container and pass the socket name.
- id := testutil.UniqueContainerID()
- c, err := Create(id, spec, conf, bundleDir, sock, "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ ConsoleSocket: sock,
+ }
+ c, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
diff --git a/runsc/container/container.go b/runsc/container/container.go
index 04b611b56..8320bb2ca 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -33,12 +33,12 @@ import (
"github.com/cenkalti/backoff"
"github.com/gofrs/flock"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/control"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/cgroup"
- "gvisor.googlesource.com/gvisor/runsc/sandbox"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/cgroup"
+ "gvisor.dev/gvisor/runsc/sandbox"
+ "gvisor.dev/gvisor/runsc/specutils"
)
const (
@@ -206,7 +206,7 @@ func findContainerRoot(rootDir, partialID string) (string, error) {
}
// Now see whether id could be an abbreviation of exactly 1 of the
- // container ids. If id is ambigious (it could match more than 1
+ // container ids. If id is ambiguous (it could match more than 1
// container), it is an error.
cRoot = ""
ids, err := List(rootDir)
@@ -242,16 +242,47 @@ func List(rootDir string) ([]string, error) {
return out, nil
}
-// Create creates the container in a new Sandbox process, unless the metadata
+// Args is used to configure a new container.
+type Args struct {
+ // ID is the container unique identifier.
+ ID string
+
+ // Spec is the OCI spec that describes the container.
+ Spec *specs.Spec
+
+ // BundleDir is the directory containing the container bundle.
+ BundleDir string
+
+ // ConsoleSocket is the path to a unix domain socket that will receive
+ // the console FD. It may be empty.
+ ConsoleSocket string
+
+ // PIDFile is the filename where the container's root process PID will be
+ // written to. It may be empty.
+ PIDFile string
+
+ // UserLog is the filename to send user-visible logs to. It may be empty.
+ //
+ // It only applies for the init container.
+ UserLog string
+
+ // Attached indicates that the sandbox lifecycle is attached with the caller.
+ // If the caller exits, the sandbox should exit too.
+ //
+ // It only applies for the init container.
+ Attached bool
+}
+
+// New creates the container in a new Sandbox process, unless the metadata
// indicates that an existing Sandbox should be used. The caller must call
// Destroy() on the container.
-func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, pidFile, userLog string) (*Container, error) {
- log.Debugf("Create container %q in root dir: %s", id, conf.RootDir)
- if err := validateID(id); err != nil {
+func New(conf *boot.Config, args Args) (*Container, error) {
+ log.Debugf("Create container %q in root dir: %s", args.ID, conf.RootDir)
+ if err := validateID(args.ID); err != nil {
return nil, err
}
- unlockRoot, err := maybeLockRootContainer(spec, conf.RootDir)
+ unlockRoot, err := maybeLockRootContainer(args.Spec, conf.RootDir)
if err != nil {
return nil, err
}
@@ -259,7 +290,7 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo
// Lock the container metadata file to prevent concurrent creations of
// containers with the same id.
- containerRoot := filepath.Join(conf.RootDir, id)
+ containerRoot := filepath.Join(conf.RootDir, args.ID)
unlock, err := lockContainerMetadata(containerRoot)
if err != nil {
return nil, err
@@ -269,16 +300,16 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo
// Check if the container already exists by looking for the metadata
// file.
if _, err := os.Stat(filepath.Join(containerRoot, metadataFilename)); err == nil {
- return nil, fmt.Errorf("container with id %q already exists", id)
+ return nil, fmt.Errorf("container with id %q already exists", args.ID)
} else if !os.IsNotExist(err) {
return nil, fmt.Errorf("looking for existing container in %q: %v", containerRoot, err)
}
c := &Container{
- ID: id,
- Spec: spec,
- ConsoleSocket: consoleSocket,
- BundleDir: bundleDir,
+ ID: args.ID,
+ Spec: args.Spec,
+ ConsoleSocket: args.ConsoleSocket,
+ BundleDir: args.BundleDir,
Root: containerRoot,
Status: Creating,
CreatedAt: time.Now(),
@@ -294,31 +325,47 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo
// started in an existing sandbox, we must do so. The metadata will
// indicate the ID of the sandbox, which is the same as the ID of the
// init container in the sandbox.
- if isRoot(spec) {
- log.Debugf("Creating new sandbox for container %q", id)
+ if isRoot(args.Spec) {
+ log.Debugf("Creating new sandbox for container %q", args.ID)
// Create and join cgroup before processes are created to ensure they are
- // part of the cgroup from the start (and all tneir children processes).
- cg, err := cgroup.New(spec)
+ // part of the cgroup from the start (and all their children processes).
+ cg, err := cgroup.New(args.Spec)
if err != nil {
return nil, err
}
if cg != nil {
// If there is cgroup config, install it before creating sandbox process.
- if err := cg.Install(spec.Linux.Resources); err != nil {
+ if err := cg.Install(args.Spec.Linux.Resources); err != nil {
return nil, fmt.Errorf("configuring cgroup: %v", err)
}
}
if err := runInCgroup(cg, func() error {
- ioFiles, specFile, err := c.createGoferProcess(spec, conf, bundleDir)
+ ioFiles, specFile, err := c.createGoferProcess(args.Spec, conf, args.BundleDir)
if err != nil {
return err
}
// Start a new sandbox for this container. Any errors after this point
// must destroy the container.
- c.Sandbox, err = sandbox.New(id, spec, conf, bundleDir, consoleSocket, userLog, ioFiles, specFile, cg)
- return err
+ sandArgs := &sandbox.Args{
+ ID: args.ID,
+ Spec: args.Spec,
+ BundleDir: args.BundleDir,
+ ConsoleSocket: args.ConsoleSocket,
+ UserLog: args.UserLog,
+ IOFiles: ioFiles,
+ MountsFile: specFile,
+ Cgroup: cg,
+ Attached: args.Attached,
+ }
+ sand, err := sandbox.New(conf, sandArgs)
+ if err != nil {
+ return err
+ }
+ c.Sandbox = sand
+ return nil
+
}); err != nil {
return nil, err
}
@@ -331,7 +378,7 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo
// * A container struct whose sandbox ID is equal to the above
// container/sandbox ID, but that has a different container
// ID. This is the child container.
- sbid, ok := specutils.SandboxID(spec)
+ sbid, ok := specutils.SandboxID(args.Spec)
if !ok {
return nil, fmt.Errorf("no sandbox ID found when creating container")
}
@@ -356,8 +403,8 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo
// Write the PID file. Containerd considers the create complete after
// this file is created, so it must be the last thing we do.
- if pidFile != "" {
- if err := ioutil.WriteFile(pidFile, []byte(strconv.Itoa(c.SandboxPid())), 0644); err != nil {
+ if args.PIDFile != "" {
+ if err := ioutil.WriteFile(args.PIDFile, []byte(strconv.Itoa(c.SandboxPid())), 0644); err != nil {
return nil, fmt.Errorf("error writing PID file: %v", err)
}
}
@@ -399,7 +446,7 @@ func (c *Container) Start(conf *boot.Config) error {
}
} else {
// Join cgroup to strt gofer process to ensure it's part of the cgroup from
- // the start (and all tneir children processes).
+ // the start (and all their children processes).
if err := runInCgroup(c.Sandbox.Cgroup, func() error {
// Create the gofer process.
ioFiles, mountsFile, err := c.createGoferProcess(c.Spec, conf, c.BundleDir)
@@ -461,13 +508,13 @@ func (c *Container) Restore(spec *specs.Spec, conf *boot.Config, restoreFile str
}
// Run is a helper that calls Create + Start + Wait.
-func Run(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, pidFile, userLog string, detach bool) (syscall.WaitStatus, error) {
- log.Debugf("Run container %q in root dir: %s", id, conf.RootDir)
- c, err := Create(id, spec, conf, bundleDir, consoleSocket, pidFile, userLog)
+func Run(conf *boot.Config, args Args) (syscall.WaitStatus, error) {
+ log.Debugf("Run container %q in root dir: %s", args.ID, conf.RootDir)
+ c, err := New(conf, args)
if err != nil {
return 0, fmt.Errorf("creating container: %v", err)
}
- // Clean up partially created container if an error ocurrs.
+ // Clean up partially created container if an error occurs.
// Any errors returned by Destroy() itself are ignored.
cu := specutils.MakeCleanup(func() {
c.Destroy()
@@ -476,7 +523,7 @@ func Run(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocke
if conf.RestoreFile != "" {
log.Debugf("Restore: %v", conf.RestoreFile)
- if err := c.Restore(spec, conf, conf.RestoreFile); err != nil {
+ if err := c.Restore(args.Spec, conf, conf.RestoreFile); err != nil {
return 0, fmt.Errorf("starting container: %v", err)
}
} else {
@@ -484,11 +531,11 @@ func Run(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocke
return 0, fmt.Errorf("starting container: %v", err)
}
}
- if detach {
- cu.Release()
- return 0, nil
+ if args.Attached {
+ return c.Wait()
}
- return c.Wait()
+ cu.Release()
+ return 0, nil
}
// Execute runs the specified command in the container. It returns the PID of
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
index 72c5ecbb0..c1d6ca7b8 100644
--- a/runsc/container/container_test.go
+++ b/runsc/container/container_test.go
@@ -17,6 +17,7 @@ package container
import (
"bytes"
"fmt"
+ "io"
"io/ioutil"
"os"
"path"
@@ -31,12 +32,14 @@ import (
"github.com/cenkalti/backoff"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/control"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/boot/platforms"
+ "gvisor.dev/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
// waitForProcessList waits for the given process list to show up in the container.
@@ -210,7 +213,13 @@ func run(spec *specs.Spec, conf *boot.Config) error {
defer os.RemoveAll(bundleDir)
// Create, start and wait for the container.
- ws, err := Run(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "", false)
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ Attached: true,
+ }
+ ws, err := Run(conf, args)
if err != nil {
return fmt.Errorf("running container: %v", err)
}
@@ -249,7 +258,7 @@ func configs(opts ...configOption) []*boot.Config {
if testutil.RaceEnabled {
continue
}
- c.Platform = boot.PlatformKVM
+ c.Platform = platforms.KVM
case nonExclusiveFS:
c.FileAccess = boot.FileAccessShared
default:
@@ -294,15 +303,19 @@ func TestLifecycle(t *testing.T) {
},
}
// Create the container.
- id := testutil.UniqueContainerID()
- c, err := Create(id, spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ c, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
defer c.Destroy()
// Load the container from disk and check the status.
- c, err = Load(rootDir, id)
+ c, err = Load(rootDir, args.ID)
if err != nil {
t.Fatalf("error loading container: %v", err)
}
@@ -315,7 +328,7 @@ func TestLifecycle(t *testing.T) {
if err != nil {
t.Fatalf("error listing containers: %v", err)
}
- if got, want := ids, []string{id}; !reflect.DeepEqual(got, want) {
+ if got, want := ids, []string{args.ID}; !reflect.DeepEqual(got, want) {
t.Errorf("container list got %v, want %v", got, want)
}
@@ -325,7 +338,7 @@ func TestLifecycle(t *testing.T) {
}
// Load the container from disk and check the status.
- c, err = Load(rootDir, id)
+ c, err = Load(rootDir, args.ID)
if err != nil {
t.Fatalf("error loading container: %v", err)
}
@@ -366,7 +379,7 @@ func TestLifecycle(t *testing.T) {
wg.Wait()
// Load the container from disk and check the status.
- c, err = Load(rootDir, id)
+ c, err = Load(rootDir, args.ID)
if err != nil {
t.Fatalf("error loading container: %v", err)
}
@@ -389,7 +402,7 @@ func TestLifecycle(t *testing.T) {
}
// Loading the container by id should fail.
- if _, err = Load(rootDir, id); err == nil {
+ if _, err = Load(rootDir, args.ID); err == nil {
t.Errorf("expected loading destroyed container to fail, but it did not")
}
}
@@ -397,6 +410,46 @@ func TestLifecycle(t *testing.T) {
// Test the we can execute the application with different path formats.
func TestExePath(t *testing.T) {
+ // Create two directories that will be prepended to PATH.
+ firstPath, err := ioutil.TempDir(testutil.TmpDir(), "first")
+ if err != nil {
+ t.Fatal(err)
+ }
+ secondPath, err := ioutil.TempDir(testutil.TmpDir(), "second")
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ // Create two minimal executables in the second path, two of which
+ // will be masked by files in first path.
+ for _, p := range []string{"unmasked", "masked1", "masked2"} {
+ path := filepath.Join(secondPath, p)
+ f, err := os.OpenFile(path, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0777)
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer f.Close()
+ if _, err := io.WriteString(f, "#!/bin/true\n"); err != nil {
+ t.Fatal(err)
+ }
+ }
+
+ // Create a non-executable file in the first path which masks a healthy
+ // executable in the second.
+ nonExecutable := filepath.Join(firstPath, "masked1")
+ f2, err := os.OpenFile(nonExecutable, os.O_CREATE|os.O_EXCL, 0666)
+ if err != nil {
+ t.Fatal(err)
+ }
+ f2.Close()
+
+ // Create a non-regular file in the first path which masks a healthy
+ // executable in the second.
+ nonRegular := filepath.Join(firstPath, "masked2")
+ if err := os.Mkdir(nonRegular, 0777); err != nil {
+ t.Fatal(err)
+ }
+
for _, conf := range configs(overlay) {
t.Logf("Running test with conf: %+v", conf)
for _, test := range []struct {
@@ -409,14 +462,36 @@ func TestExePath(t *testing.T) {
{path: "thisfiledoesntexit", success: false},
{path: "bin/thisfiledoesntexit", success: false},
{path: "/bin/thisfiledoesntexit", success: false},
+
+ {path: "unmasked", success: true},
+ {path: filepath.Join(firstPath, "unmasked"), success: false},
+ {path: filepath.Join(secondPath, "unmasked"), success: true},
+
+ {path: "masked1", success: true},
+ {path: filepath.Join(firstPath, "masked1"), success: false},
+ {path: filepath.Join(secondPath, "masked1"), success: true},
+
+ {path: "masked2", success: true},
+ {path: filepath.Join(firstPath, "masked2"), success: false},
+ {path: filepath.Join(secondPath, "masked2"), success: true},
} {
spec := testutil.NewSpecWithArgs(test.path)
+ spec.Process.Env = []string{
+ fmt.Sprintf("PATH=%s:%s:%s", firstPath, secondPath, os.Getenv("PATH")),
+ }
+
rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
if err != nil {
t.Fatalf("exec: %s, error setting up container: %v", test.path, err)
}
- ws, err := Run(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "", false)
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ Attached: true,
+ }
+ ws, err := Run(conf, args)
os.RemoveAll(rootDir)
os.RemoveAll(bundleDir)
@@ -449,7 +524,13 @@ func TestAppExitStatus(t *testing.T) {
defer os.RemoveAll(rootDir)
defer os.RemoveAll(bundleDir)
- ws, err := Run(testutil.UniqueContainerID(), succSpec, conf, bundleDir, "", "", "", false)
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: succSpec,
+ BundleDir: bundleDir,
+ Attached: true,
+ }
+ ws, err := Run(conf, args)
if err != nil {
t.Fatalf("error running container: %v", err)
}
@@ -468,7 +549,13 @@ func TestAppExitStatus(t *testing.T) {
defer os.RemoveAll(rootDir2)
defer os.RemoveAll(bundleDir2)
- ws, err = Run(testutil.UniqueContainerID(), errSpec, conf, bundleDir2, "", "", "", false)
+ args2 := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: errSpec,
+ BundleDir: bundleDir2,
+ Attached: true,
+ }
+ ws, err = Run(conf, args2)
if err != nil {
t.Fatalf("error running container: %v", err)
}
@@ -493,7 +580,12 @@ func TestExec(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create and start the container.
- cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -525,7 +617,7 @@ func TestExec(t *testing.T) {
t.Error(err)
}
- args := &control.ExecArgs{
+ execArgs := &control.ExecArgs{
Filename: "/bin/sleep",
Argv: []string{"/bin/sleep", "5"},
WorkingDirectory: "/",
@@ -536,7 +628,7 @@ func TestExec(t *testing.T) {
// First, start running exec (whick blocks).
status := make(chan error, 1)
go func() {
- exitStatus, err := cont.executeSync(args)
+ exitStatus, err := cont.executeSync(execArgs)
if err != nil {
log.Debugf("error executing: %v", err)
status <- err
@@ -584,7 +676,12 @@ func TestKillPid(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create and start the container.
- cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -655,7 +752,12 @@ func TestCheckpointRestore(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create and start the container.
- cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -701,7 +803,12 @@ func TestCheckpointRestore(t *testing.T) {
defer outputFile2.Close()
// Restore into a new container.
- cont2, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args2 := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont2, err := New(conf, args2)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -740,7 +847,12 @@ func TestCheckpointRestore(t *testing.T) {
defer outputFile3.Close()
// Restore into a new container.
- cont3, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args3 := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont3, err := New(conf, args3)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -777,7 +889,7 @@ func TestUnixDomainSockets(t *testing.T) {
t.Logf("Running test with conf: %+v", conf)
// UDS path is limited to 108 chars for compatibility with older systems.
- // Use '/tmp' (instead of testutil.TmpDir) to to ensure the size limit is
+ // Use '/tmp' (instead of testutil.TmpDir) to ensure the size limit is
// not exceeded. Assumes '/tmp' exists in the system.
dir, err := ioutil.TempDir("/tmp", "uds-test")
if err != nil {
@@ -819,7 +931,12 @@ func TestUnixDomainSockets(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create and start the container.
- cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -866,7 +983,12 @@ func TestUnixDomainSockets(t *testing.T) {
defer outputFile2.Close()
// Restore into a new container.
- contRestore, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ argsRestore := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ contRestore, err := New(conf, argsRestore)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -920,7 +1042,12 @@ func TestPauseResume(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create and start the container.
- cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -948,7 +1075,7 @@ func TestPauseResume(t *testing.T) {
}
script := fmt.Sprintf("while [[ -f %q ]]; do sleep 0.1; done", lock.Name())
- args := &control.ExecArgs{
+ execArgs := &control.ExecArgs{
Filename: "/bin/bash",
Argv: []string{"bash", "-c", script},
WorkingDirectory: "/",
@@ -956,7 +1083,7 @@ func TestPauseResume(t *testing.T) {
}
// First, start running exec.
- _, err = cont.Execute(args)
+ _, err = cont.Execute(execArgs)
if err != nil {
t.Fatalf("error executing: %v", err)
}
@@ -1025,7 +1152,12 @@ func TestPauseResumeStatus(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create and start the container.
- cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -1089,7 +1221,12 @@ func TestCapabilities(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create and start the container.
- cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -1131,7 +1268,7 @@ func TestCapabilities(t *testing.T) {
// Need to traverse the intermediate directory.
os.Chmod(rootDir, 0755)
- args := &control.ExecArgs{
+ execArgs := &control.ExecArgs{
Filename: exePath,
Argv: []string{exePath},
WorkingDirectory: "/",
@@ -1141,16 +1278,16 @@ func TestCapabilities(t *testing.T) {
}
// "exe" should fail because we don't have the necessary permissions.
- if _, err := cont.executeSync(args); err == nil {
+ if _, err := cont.executeSync(execArgs); err == nil {
t.Fatalf("container executed without error, but an error was expected")
}
// Now we run with the capability enabled and should succeed.
- args.Capabilities = &auth.TaskCapabilities{
+ execArgs.Capabilities = &auth.TaskCapabilities{
EffectiveCaps: auth.CapabilitySetOf(linux.CAP_DAC_OVERRIDE),
}
// "exe" should not fail this time.
- if _, err := cont.executeSync(args); err != nil {
+ if _, err := cont.executeSync(execArgs); err != nil {
t.Fatalf("container failed to exec %v: %v", args, err)
}
}
@@ -1231,7 +1368,12 @@ func TestReadonlyRoot(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create, start and wait for the container.
- c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ c, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -1299,7 +1441,12 @@ func TestUIDMap(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create, start and wait for the container.
- c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ c, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -1351,7 +1498,12 @@ func TestReadonlyMount(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create, start and wait for the container.
- c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ c, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -1395,7 +1547,12 @@ func TestAbbreviatedIDs(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create and start the container.
- cont, err := Create(cid, spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: cid,
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -1440,7 +1597,12 @@ func TestGoferExits(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create and start the container.
- c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ c, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -1519,7 +1681,14 @@ func TestUserLog(t *testing.T) {
userLog := filepath.Join(dir, "user.log")
// Create, start and wait for the container.
- ws, err := Run(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", userLog, false)
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ UserLog: userLog,
+ Attached: true,
+ }
+ ws, err := Run(conf, args)
if err != nil {
t.Fatalf("error running container: %v", err)
}
@@ -1553,7 +1722,12 @@ func TestWaitOnExitedSandbox(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create and Start the container.
- c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ c, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -1596,7 +1770,12 @@ func TestDestroyNotStarted(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create the container and check that it can be destroyed.
- c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ c, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -1618,15 +1797,19 @@ func TestDestroyStarting(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create the container and check that it can be destroyed.
- id := testutil.UniqueContainerID()
- c, err := Create(id, spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ c, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
// Container is not thread safe, so load another instance to run in
// concurrently.
- startCont, err := Load(rootDir, id)
+ startCont, err := Load(rootDir, args.ID)
if err != nil {
t.Fatalf("error loading container: %v", err)
}
@@ -1731,7 +1914,12 @@ func TestMountPropagation(t *testing.T) {
defer os.RemoveAll(rootDir)
defer os.RemoveAll(bundleDir)
- cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont, err := New(conf, args)
if err != nil {
t.Fatalf("creating container: %v", err)
}
@@ -1749,21 +1937,21 @@ func TestMountPropagation(t *testing.T) {
// Check that mount didn't propagate to private mount.
privFile := filepath.Join(priv, "mnt", "file")
- args := &control.ExecArgs{
+ execArgs := &control.ExecArgs{
Filename: "/usr/bin/test",
Argv: []string{"test", "!", "-f", privFile},
}
- if ws, err := cont.executeSync(args); err != nil || ws != 0 {
+ if ws, err := cont.executeSync(execArgs); err != nil || ws != 0 {
t.Fatalf("exec: test ! -f %q, ws: %v, err: %v", privFile, ws, err)
}
// Check that mount propagated to slave mount.
slaveFile := filepath.Join(slave, "mnt", "file")
- args = &control.ExecArgs{
+ execArgs = &control.ExecArgs{
Filename: "/usr/bin/test",
Argv: []string{"test", "-f", slaveFile},
}
- if ws, err := cont.executeSync(args); err != nil || ws != 0 {
+ if ws, err := cont.executeSync(execArgs); err != nil || ws != 0 {
t.Fatalf("exec: test -f %q, ws: %v, err: %v", privFile, ws, err)
}
}
@@ -1812,7 +2000,12 @@ func TestMountSymlink(t *testing.T) {
defer os.RemoveAll(rootDir)
defer os.RemoveAll(bundleDir)
- cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont, err := New(conf, args)
if err != nil {
t.Fatalf("creating container: %v", err)
}
@@ -1825,11 +2018,11 @@ func TestMountSymlink(t *testing.T) {
// Check that symlink was resolved and mount was created where the symlink
// is pointing to.
file := path.Join(target, "file")
- args := &control.ExecArgs{
+ execArgs := &control.ExecArgs{
Filename: "/usr/bin/test",
Argv: []string{"test", "-f", file},
}
- if ws, err := cont.executeSync(args); err != nil || ws != 0 {
+ if ws, err := cont.executeSync(execArgs); err != nil || ws != 0 {
t.Fatalf("exec: test -f %q, ws: %v, err: %v", file, ws, err)
}
}
@@ -1853,7 +2046,7 @@ func TestMain(m *testing.M) {
if err := testutil.ConfigureExePath(); err != nil {
panic(err.Error())
}
- testutil.RunAsRoot()
+ specutils.MaybeRunAsRoot()
os.Exit(m.Run())
}
diff --git a/runsc/container/hook.go b/runsc/container/hook.go
index acae6781e..901607aee 100644
--- a/runsc/container/hook.go
+++ b/runsc/container/hook.go
@@ -24,7 +24,7 @@ import (
"time"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/log"
)
// This file implements hooks as defined in OCI spec:
diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go
index 4ea3c74ac..c0f9b372c 100644
--- a/runsc/container/multi_container_test.go
+++ b/runsc/container/multi_container_test.go
@@ -28,10 +28,10 @@ import (
"time"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/sentry/control"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
func createSpecs(cmds ...[]string) ([]*specs.Spec, []string) {
@@ -84,7 +84,12 @@ func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*C
}
bundles = append(bundles, bundleDir)
- cont, err := Create(ids[i], spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: ids[i],
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ cont, err := New(conf, args)
if err != nil {
cleanup()
return nil, nil, fmt.Errorf("error creating container: %v", err)
@@ -99,6 +104,36 @@ func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*C
return containers, cleanup, nil
}
+type execDesc struct {
+ c *Container
+ cmd []string
+ want int
+ desc string
+}
+
+func execMany(execs []execDesc) error {
+ for _, exec := range execs {
+ args := &control.ExecArgs{Argv: exec.cmd}
+ if ws, err := exec.c.executeSync(args); err != nil {
+ return fmt.Errorf("error executing %+v: %v", args, err)
+ } else if ws.ExitStatus() != exec.want {
+ return fmt.Errorf("%q: exec %q got exit status: %d, want: %d", exec.desc, exec.cmd, ws.ExitStatus(), exec.want)
+ }
+ }
+ return nil
+}
+
+func createSharedMount(mount specs.Mount, name string, pod ...*specs.Spec) {
+ for _, spec := range pod {
+ spec.Annotations[path.Join(boot.MountPrefix, name, "source")] = mount.Source
+ spec.Annotations[path.Join(boot.MountPrefix, name, "type")] = mount.Type
+ spec.Annotations[path.Join(boot.MountPrefix, name, "share")] = "pod"
+ if len(mount.Options) > 0 {
+ spec.Annotations[path.Join(boot.MountPrefix, name, "options")] = strings.Join(mount.Options, ",")
+ }
+ }
+}
+
// TestMultiContainerSanity checks that it is possible to run 2 dead-simple
// containers in the same sandbox.
func TestMultiContainerSanity(t *testing.T) {
@@ -631,7 +666,12 @@ func TestMultiContainerDestroyNotStarted(t *testing.T) {
}
defer os.RemoveAll(rootBundleDir)
- root, err := Create(ids[0], specs[0], conf, rootBundleDir, "", "", "")
+ rootArgs := Args{
+ ID: ids[0],
+ Spec: specs[0],
+ BundleDir: rootBundleDir,
+ }
+ root, err := New(conf, rootArgs)
if err != nil {
t.Fatalf("error creating root container: %v", err)
}
@@ -647,7 +687,12 @@ func TestMultiContainerDestroyNotStarted(t *testing.T) {
}
defer os.RemoveAll(bundleDir)
- cont, err := Create(ids[1], specs[1], conf, bundleDir, "", "", "")
+ args := Args{
+ ID: ids[1],
+ Spec: specs[1],
+ BundleDir: bundleDir,
+ }
+ cont, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -682,7 +727,12 @@ func TestMultiContainerDestroyStarting(t *testing.T) {
}
defer os.RemoveAll(rootBundleDir)
- root, err := Create(ids[0], specs[0], conf, rootBundleDir, "", "", "")
+ rootArgs := Args{
+ ID: ids[0],
+ Spec: specs[0],
+ BundleDir: rootBundleDir,
+ }
+ root, err := New(conf, rootArgs)
if err != nil {
t.Fatalf("error creating root container: %v", err)
}
@@ -703,7 +753,12 @@ func TestMultiContainerDestroyStarting(t *testing.T) {
}
defer os.RemoveAll(bundleDir)
- cont, err := Create(ids[i], specs[i], conf, bundleDir, "", "", "")
+ rootArgs := Args{
+ ID: ids[i],
+ Spec: specs[i],
+ BundleDir: rootBundleDir,
+ }
+ cont, err := New(conf, rootArgs)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -777,7 +832,12 @@ func TestMultiContainerGoferStop(t *testing.T) {
// Start root container.
conf := testutil.TestConfigWithRoot(rootDir)
- root, err := Create(rootID, rootSpec, conf, bundleDir, "", "", "")
+ rootArgs := Args{
+ ID: rootID,
+ Spec: rootSpec,
+ BundleDir: bundleDir,
+ }
+ root, err := New(conf, rootArgs)
if err != nil {
t.Fatalf("error creating root container: %v", err)
}
@@ -801,7 +861,12 @@ func TestMultiContainerGoferStop(t *testing.T) {
}
defer os.RemoveAll(bundleDir)
- child, err := Create(ids[j], spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: ids[j],
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ child, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -828,3 +893,277 @@ func TestMultiContainerGoferStop(t *testing.T) {
}
}
}
+
+// Test that pod shared mounts are properly mounted in 2 containers and that
+// changes from one container is reflected in the other.
+func TestMultiContainerSharedMount(t *testing.T) {
+ for _, conf := range configs(all...) {
+ t.Logf("Running test with conf: %+v", conf)
+
+ // Setup the containers.
+ sleep := []string{"sleep", "100"}
+ podSpec, ids := createSpecs(sleep, sleep)
+ mnt0 := specs.Mount{
+ Destination: "/mydir/test",
+ Source: "/some/dir",
+ Type: "tmpfs",
+ Options: nil,
+ }
+ podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0)
+
+ mnt1 := mnt0
+ mnt1.Destination = "/mydir2/test2"
+ podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1)
+
+ createSharedMount(mnt0, "test-mount", podSpec...)
+
+ containers, cleanup, err := startContainers(conf, podSpec, ids)
+ if err != nil {
+ t.Fatalf("error starting containers: %v", err)
+ }
+ defer cleanup()
+
+ file0 := path.Join(mnt0.Destination, "abc")
+ file1 := path.Join(mnt1.Destination, "abc")
+ execs := []execDesc{
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/test", "-d", mnt0.Destination},
+ desc: "directory is mounted in container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/test", "-d", mnt1.Destination},
+ desc: "directory is mounted in container1",
+ },
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/touch", file0},
+ desc: "create file in container0",
+ },
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/test", "-f", file0},
+ desc: "file appears in container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/test", "-f", file1},
+ desc: "file appears in container1",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/bin/rm", file1},
+ desc: "file removed from container1",
+ },
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/test", "!", "-f", file0},
+ desc: "file removed from container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/test", "!", "-f", file1},
+ desc: "file removed from container1",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/bin/mkdir", file1},
+ desc: "create directory in container1",
+ },
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/test", "-d", file0},
+ desc: "dir appears in container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/test", "-d", file1},
+ desc: "dir appears in container1",
+ },
+ {
+ c: containers[0],
+ cmd: []string{"/bin/rmdir", file0},
+ desc: "create directory in container0",
+ },
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/test", "!", "-d", file0},
+ desc: "dir removed from container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/test", "!", "-d", file1},
+ desc: "dir removed from container1",
+ },
+ }
+ if err := execMany(execs); err != nil {
+ t.Fatal(err.Error())
+ }
+ }
+}
+
+// Test that pod mounts are mounted as readonly when requested.
+func TestMultiContainerSharedMountReadonly(t *testing.T) {
+ for _, conf := range configs(all...) {
+ t.Logf("Running test with conf: %+v", conf)
+
+ // Setup the containers.
+ sleep := []string{"sleep", "100"}
+ podSpec, ids := createSpecs(sleep, sleep)
+ mnt0 := specs.Mount{
+ Destination: "/mydir/test",
+ Source: "/some/dir",
+ Type: "tmpfs",
+ Options: []string{"ro"},
+ }
+ podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0)
+
+ mnt1 := mnt0
+ mnt1.Destination = "/mydir2/test2"
+ podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1)
+
+ createSharedMount(mnt0, "test-mount", podSpec...)
+
+ containers, cleanup, err := startContainers(conf, podSpec, ids)
+ if err != nil {
+ t.Fatalf("error starting containers: %v", err)
+ }
+ defer cleanup()
+
+ file0 := path.Join(mnt0.Destination, "abc")
+ file1 := path.Join(mnt1.Destination, "abc")
+ execs := []execDesc{
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/test", "-d", mnt0.Destination},
+ desc: "directory is mounted in container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/test", "-d", mnt1.Destination},
+ desc: "directory is mounted in container1",
+ },
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/touch", file0},
+ want: 1,
+ desc: "fails to write to container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/touch", file1},
+ want: 1,
+ desc: "fails to write to container1",
+ },
+ }
+ if err := execMany(execs); err != nil {
+ t.Fatal(err.Error())
+ }
+ }
+}
+
+// Test that shared pod mounts continue to work after container is restarted.
+func TestMultiContainerSharedMountRestart(t *testing.T) {
+ for _, conf := range configs(all...) {
+ t.Logf("Running test with conf: %+v", conf)
+
+ // Setup the containers.
+ sleep := []string{"sleep", "100"}
+ podSpec, ids := createSpecs(sleep, sleep)
+ mnt0 := specs.Mount{
+ Destination: "/mydir/test",
+ Source: "/some/dir",
+ Type: "tmpfs",
+ Options: nil,
+ }
+ podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0)
+
+ mnt1 := mnt0
+ mnt1.Destination = "/mydir2/test2"
+ podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1)
+
+ createSharedMount(mnt0, "test-mount", podSpec...)
+
+ containers, cleanup, err := startContainers(conf, podSpec, ids)
+ if err != nil {
+ t.Fatalf("error starting containers: %v", err)
+ }
+ defer cleanup()
+
+ file0 := path.Join(mnt0.Destination, "abc")
+ file1 := path.Join(mnt1.Destination, "abc")
+ execs := []execDesc{
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/touch", file0},
+ desc: "create file in container0",
+ },
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/test", "-f", file0},
+ desc: "file appears in container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/test", "-f", file1},
+ desc: "file appears in container1",
+ },
+ }
+ if err := execMany(execs); err != nil {
+ t.Fatal(err.Error())
+ }
+
+ containers[1].Destroy()
+
+ bundleDir, err := testutil.SetupBundleDir(podSpec[1])
+ if err != nil {
+ t.Fatalf("error restarting container: %v", err)
+ }
+ defer os.RemoveAll(bundleDir)
+
+ args := Args{
+ ID: ids[1],
+ Spec: podSpec[1],
+ BundleDir: bundleDir,
+ }
+ containers[1], err = New(conf, args)
+ if err != nil {
+ t.Fatalf("error creating container: %v", err)
+ }
+ if err := containers[1].Start(conf); err != nil {
+ t.Fatalf("error starting container: %v", err)
+ }
+
+ execs = []execDesc{
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/test", "-f", file0},
+ desc: "file is still in container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/test", "-f", file1},
+ desc: "file is still in container1",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/bin/rm", file1},
+ desc: "file removed from container1",
+ },
+ {
+ c: containers[0],
+ cmd: []string{"/usr/bin/test", "!", "-f", file0},
+ desc: "file removed from container0",
+ },
+ {
+ c: containers[1],
+ cmd: []string{"/usr/bin/test", "!", "-f", file1},
+ desc: "file removed from container1",
+ },
+ }
+ if err := execMany(execs); err != nil {
+ t.Fatal(err.Error())
+ }
+ }
+}
diff --git a/runsc/container/shared_volume_test.go b/runsc/container/shared_volume_test.go
index 9d5a592a5..1f90d2462 100644
--- a/runsc/container/shared_volume_test.go
+++ b/runsc/container/shared_volume_test.go
@@ -22,10 +22,10 @@ import (
"path/filepath"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/sentry/control"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
// TestSharedVolume checks that modifications to a volume mount are propagated
@@ -52,7 +52,12 @@ func TestSharedVolume(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create and start the container.
- c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ c, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
@@ -206,7 +211,12 @@ func TestSharedVolumeFile(t *testing.T) {
defer os.RemoveAll(bundleDir)
// Create and start the container.
- c, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
+ args := Args{
+ ID: testutil.UniqueContainerID(),
+ Spec: spec,
+ BundleDir: bundleDir,
+ }
+ c, err := New(conf, args)
if err != nil {
t.Fatalf("error creating container: %v", err)
}
diff --git a/runsc/container/test_app/test_app.go b/runsc/container/test_app/test_app.go
index 62923f1ef..b7fc6498f 100644
--- a/runsc/container/test_app/test_app.go
+++ b/runsc/container/test_app/test_app.go
@@ -29,7 +29,7 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
func main() {
diff --git a/runsc/fsgofer/BUILD b/runsc/fsgofer/BUILD
index 4adc9c1bc..80a4aa2fe 100644
--- a/runsc/fsgofer/BUILD
+++ b/runsc/fsgofer/BUILD
@@ -8,7 +8,7 @@ go_library(
"fsgofer.go",
"fsgofer_unsafe.go",
],
- importpath = "gvisor.googlesource.com/gvisor/runsc/fsgofer",
+ importpath = "gvisor.dev/gvisor/runsc/fsgofer",
visibility = [
"//runsc:__subpackages__",
],
diff --git a/runsc/fsgofer/filter/BUILD b/runsc/fsgofer/filter/BUILD
index 78c5b526c..e2318a978 100644
--- a/runsc/fsgofer/filter/BUILD
+++ b/runsc/fsgofer/filter/BUILD
@@ -11,7 +11,7 @@ go_library(
"extra_filters_race.go",
"filter.go",
],
- importpath = "gvisor.googlesource.com/gvisor/runsc/fsgofer/filter",
+ importpath = "gvisor.dev/gvisor/runsc/fsgofer/filter",
visibility = [
"//runsc:__subpackages__",
],
diff --git a/runsc/fsgofer/filter/config.go b/runsc/fsgofer/filter/config.go
index 4faab2946..2d50774d4 100644
--- a/runsc/fsgofer/filter/config.go
+++ b/runsc/fsgofer/filter/config.go
@@ -19,8 +19,8 @@ import (
"syscall"
"golang.org/x/sys/unix"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/seccomp"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/seccomp"
)
// allowedSyscalls is the set of syscalls executed by the gofer.
diff --git a/runsc/fsgofer/filter/extra_filters.go b/runsc/fsgofer/filter/extra_filters.go
index 5c5ec4e06..e28d4b8d6 100644
--- a/runsc/fsgofer/filter/extra_filters.go
+++ b/runsc/fsgofer/filter/extra_filters.go
@@ -17,11 +17,11 @@
package filter
import (
- "gvisor.googlesource.com/gvisor/pkg/seccomp"
+ "gvisor.dev/gvisor/pkg/seccomp"
)
// instrumentationFilters returns additional filters for syscalls used by
-// Go intrumentation tools, e.g. -race, -msan.
+// Go instrumentation tools, e.g. -race, -msan.
// Returns empty when disabled.
func instrumentationFilters() seccomp.SyscallRules {
return nil
diff --git a/runsc/fsgofer/filter/extra_filters_msan.go b/runsc/fsgofer/filter/extra_filters_msan.go
index 553060bc3..8c6179c8f 100644
--- a/runsc/fsgofer/filter/extra_filters_msan.go
+++ b/runsc/fsgofer/filter/extra_filters_msan.go
@@ -19,8 +19,8 @@ package filter
import (
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/seccomp"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/seccomp"
)
// instrumentationFilters returns additional filters for syscalls used by MSAN.
diff --git a/runsc/fsgofer/filter/extra_filters_race.go b/runsc/fsgofer/filter/extra_filters_race.go
index 28555f898..885c92f7a 100644
--- a/runsc/fsgofer/filter/extra_filters_race.go
+++ b/runsc/fsgofer/filter/extra_filters_race.go
@@ -19,8 +19,8 @@ package filter
import (
"syscall"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/seccomp"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/seccomp"
)
// instrumentationFilters returns additional filters for syscalls used by TSAN.
diff --git a/runsc/fsgofer/filter/filter.go b/runsc/fsgofer/filter/filter.go
index ff8154369..65053415f 100644
--- a/runsc/fsgofer/filter/filter.go
+++ b/runsc/fsgofer/filter/filter.go
@@ -18,7 +18,7 @@
package filter
import (
- "gvisor.googlesource.com/gvisor/pkg/seccomp"
+ "gvisor.dev/gvisor/pkg/seccomp"
)
// Install installs seccomp filters.
diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go
index 2cf50290a..fe450c64f 100644
--- a/runsc/fsgofer/fsgofer.go
+++ b/runsc/fsgofer/fsgofer.go
@@ -28,15 +28,16 @@ import (
"path"
"path/filepath"
"runtime"
+ "strconv"
"sync"
"syscall"
"golang.org/x/sys/unix"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/fd"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/p9"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/fd"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/runsc/specutils"
)
const (
@@ -223,6 +224,28 @@ type localFile struct {
lastDirentOffset uint64
}
+var procSelfFD *fd.FD
+
+// OpenProcSelfFD opens the /proc/self/fd directory, which will be used to
+// reopen file descriptors.
+func OpenProcSelfFD() error {
+ d, err := syscall.Open("/proc/self/fd", syscall.O_RDONLY|syscall.O_DIRECTORY, 0)
+ if err != nil {
+ return fmt.Errorf("error opening /proc/self/fd: %v", err)
+ }
+ procSelfFD = fd.New(d)
+ return nil
+}
+
+func reopenProcFd(f *fd.FD, mode int) (*fd.FD, error) {
+ d, err := syscall.Openat(int(procSelfFD.FD()), strconv.Itoa(f.FD()), mode&^syscall.O_NOFOLLOW, 0)
+ if err != nil {
+ return nil, err
+ }
+
+ return fd.New(d), nil
+}
+
func openAnyFileFromParent(parent *localFile, name string) (*fd.FD, string, error) {
path := path.Join(parent.hostPath, name)
f, err := openAnyFile(path, func(mode int) (*fd.FD, error) {
@@ -348,7 +371,7 @@ func (l *localFile) Open(mode p9.OpenFlags) (*fd.FD, p9.QID, uint32, error) {
// name_to_handle_at and open_by_handle_at aren't supported by overlay2.
log.Debugf("Open reopening file, mode: %v, %q", mode, l.hostPath)
var err error
- newFile, err = fd.Open(l.hostPath, openFlags|mode.OSFlags(), 0)
+ newFile, err = reopenProcFd(l.file, openFlags|mode.OSFlags())
if err != nil {
return nil, p9.QID{}, 0, extractErrno(err)
}
@@ -477,7 +500,7 @@ func (l *localFile) Walk(names []string) ([]p9.QID, p9.File, error) {
// Duplicate current file if 'names' is empty.
if len(names) == 0 {
newFile, err := openAnyFile(l.hostPath, func(mode int) (*fd.FD, error) {
- return fd.Open(l.hostPath, openFlags|mode, 0)
+ return reopenProcFd(l.file, openFlags|mode)
})
if err != nil {
return nil, nil, extractErrno(err)
@@ -596,7 +619,7 @@ func (l *localFile) GetAttr(_ p9.AttrMask) (p9.QID, p9.AttrMask, p9.Attr, error)
}
// SetAttr implements p9.File. Due to mismatch in file API, options
-// cannot be changed atomicaly and user may see partial changes when
+// cannot be changed atomically and user may see partial changes when
// an error happens.
func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error {
conf := l.attachPoint.conf
@@ -635,7 +658,7 @@ func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error {
f := l.file
if l.ft == regular && l.mode != p9.WriteOnly && l.mode != p9.ReadWrite {
var err error
- f, err = fd.Open(l.hostPath, openFlags|syscall.O_WRONLY, 0)
+ f, err = reopenProcFd(l.file, openFlags|os.O_WRONLY)
if err != nil {
return extractErrno(err)
}
diff --git a/runsc/fsgofer/fsgofer_test.go b/runsc/fsgofer/fsgofer_test.go
index 695836927..0a162bb8a 100644
--- a/runsc/fsgofer/fsgofer_test.go
+++ b/runsc/fsgofer/fsgofer_test.go
@@ -22,8 +22,8 @@ import (
"syscall"
"testing"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/p9"
)
func init() {
@@ -31,6 +31,10 @@ func init() {
allConfs = append(allConfs, rwConfs...)
allConfs = append(allConfs, roConfs...)
+
+ if err := OpenProcSelfFD(); err != nil {
+ panic(err)
+ }
}
func assertPanic(t *testing.T, f func()) {
diff --git a/runsc/fsgofer/fsgofer_unsafe.go b/runsc/fsgofer/fsgofer_unsafe.go
index 58af5e44d..ff2556aee 100644
--- a/runsc/fsgofer/fsgofer_unsafe.go
+++ b/runsc/fsgofer/fsgofer_unsafe.go
@@ -18,8 +18,8 @@ import (
"syscall"
"unsafe"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/syserr"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/syserr"
)
func statAt(dirFd int, name string) (syscall.Stat_t, error) {
diff --git a/runsc/main.go b/runsc/main.go
index a214f6ba0..bc83c57a2 100644
--- a/runsc/main.go
+++ b/runsc/main.go
@@ -29,10 +29,11 @@ import (
"flag"
"github.com/google/subcommands"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/cmd"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/cmd"
+ "gvisor.dev/gvisor/runsc/specutils"
)
var (
@@ -61,16 +62,19 @@ var (
straceLogSize = flag.Uint("strace-log-size", 1024, "default size (in bytes) to log data argument blobs")
// Flags that control sandbox runtime behavior.
- platform = flag.String("platform", "ptrace", "specifies which platform to use: ptrace (default), kvm")
- network = flag.String("network", "sandbox", "specifies which network to use: sandbox (default), host, none. Using network inside the sandbox is more secure because it's isolated from the host network.")
- gso = flag.Bool("gso", true, "enable generic segmenation offload")
- fileAccess = flag.String("file-access", "exclusive", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.")
- overlay = flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.")
- watchdogAction = flag.String("watchdog-action", "log", "sets what action the watchdog takes when triggered: log (default), panic.")
- panicSignal = flag.Int("panic-signal", -1, "register signal handling that panics. Usually set to SIGUSR2(12) to troubleshoot hangs. -1 disables it.")
- profile = flag.Bool("profile", false, "prepares the sandbox to use Golang profiler. Note that enabling profiler loosens the seccomp protection added to the sandbox (DO NOT USE IN PRODUCTION).")
- netRaw = flag.Bool("net-raw", false, "enable raw sockets. When false, raw sockets are disabled by removing CAP_NET_RAW from containers (`runsc exec` will still be able to utilize raw sockets). Raw sockets allow malicious containers to craft packets and potentially attack the network.")
- numNetworkChannels = flag.Int("num-network-channels", 1, "number of underlying channels(FDs) to use for network link endpoints.")
+ platformName = flag.String("platform", "ptrace", "specifies which platform to use: ptrace (default), kvm")
+ network = flag.String("network", "sandbox", "specifies which network to use: sandbox (default), host, none. Using network inside the sandbox is more secure because it's isolated from the host network.")
+ gso = flag.Bool("gso", true, "enable generic segmenation offload")
+ fileAccess = flag.String("file-access", "exclusive", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.")
+ overlay = flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.")
+ watchdogAction = flag.String("watchdog-action", "log", "sets what action the watchdog takes when triggered: log (default), panic.")
+ panicSignal = flag.Int("panic-signal", -1, "register signal handling that panics. Usually set to SIGUSR2(12) to troubleshoot hangs. -1 disables it.")
+ profile = flag.Bool("profile", false, "prepares the sandbox to use Golang profiler. Note that enabling profiler loosens the seccomp protection added to the sandbox (DO NOT USE IN PRODUCTION).")
+ netRaw = flag.Bool("net-raw", false, "enable raw sockets. When false, raw sockets are disabled by removing CAP_NET_RAW from containers (`runsc exec` will still be able to utilize raw sockets). Raw sockets allow malicious containers to craft packets and potentially attack the network.")
+ numNetworkChannels = flag.Int("num-network-channels", 1, "number of underlying channels(FDs) to use for network link endpoints.")
+ rootless = flag.Bool("rootless", false, "it allows the sandbox to be started with a user that is not root. Sandbox and Gofer processes may run with same privileges as current user.")
+
+ // Test flags, not to be used outside tests, ever.
testOnlyAllowRunAsCurrentUserWithoutChroot = flag.Bool("TESTONLY-unsafe-nonroot", false, "TEST ONLY; do not ever use! This skips many security measures that isolate the host from the sandbox.")
)
@@ -136,8 +140,8 @@ func main() {
}
cmd.ErrorLogger = errorLogger
- platformType, err := boot.MakePlatformType(*platform)
- if err != nil {
+ platformType := *platformName
+ if _, err := platform.Lookup(platformType); err != nil {
cmd.Fatalf("%v", err)
}
@@ -166,26 +170,28 @@ func main() {
// Create a new Config from the flags.
conf := &boot.Config{
- RootDir: *rootDir,
- Debug: *debug,
- LogFilename: *logFilename,
- LogFormat: *logFormat,
- DebugLog: *debugLog,
- DebugLogFormat: *debugLogFormat,
- FileAccess: fsAccess,
- Overlay: *overlay,
- Network: netType,
- GSO: *gso,
- LogPackets: *logPackets,
- Platform: platformType,
- Strace: *strace,
- StraceLogSize: *straceLogSize,
- WatchdogAction: wa,
- PanicSignal: *panicSignal,
- ProfileEnable: *profile,
- EnableRaw: *netRaw,
+ RootDir: *rootDir,
+ Debug: *debug,
+ LogFilename: *logFilename,
+ LogFormat: *logFormat,
+ DebugLog: *debugLog,
+ DebugLogFormat: *debugLogFormat,
+ FileAccess: fsAccess,
+ Overlay: *overlay,
+ Network: netType,
+ GSO: *gso,
+ LogPackets: *logPackets,
+ Platform: platformType,
+ Strace: *strace,
+ StraceLogSize: *straceLogSize,
+ WatchdogAction: wa,
+ PanicSignal: *panicSignal,
+ ProfileEnable: *profile,
+ EnableRaw: *netRaw,
+ NumNetworkChannels: *numNetworkChannels,
+ Rootless: *rootless,
+
TestOnlyAllowRunAsCurrentUserWithoutChroot: *testOnlyAllowRunAsCurrentUserWithoutChroot,
- NumNetworkChannels: *numNetworkChannels,
}
if len(*straceSyscalls) != 0 {
conf.StraceSyscalls = strings.Split(*straceSyscalls, ",")
diff --git a/runsc/sandbox/BUILD b/runsc/sandbox/BUILD
index c0de9a28f..7fdceaab6 100644
--- a/runsc/sandbox/BUILD
+++ b/runsc/sandbox/BUILD
@@ -9,7 +9,7 @@ go_library(
"network_unsafe.go",
"sandbox.go",
],
- importpath = "gvisor.googlesource.com/gvisor/runsc/sandbox",
+ importpath = "gvisor.dev/gvisor/runsc/sandbox",
visibility = [
"//runsc:__subpackages__",
],
@@ -18,9 +18,10 @@ go_library(
"//pkg/control/server",
"//pkg/log",
"//pkg/sentry/control",
- "//pkg/sentry/platform/kvm",
+ "//pkg/sentry/platform",
"//pkg/urpc",
"//runsc/boot",
+ "//runsc/boot/platforms",
"//runsc/cgroup",
"//runsc/console",
"//runsc/specutils",
diff --git a/runsc/sandbox/network.go b/runsc/sandbox/network.go
index 1fd091514..a965a9dcb 100644
--- a/runsc/sandbox/network.go
+++ b/runsc/sandbox/network.go
@@ -27,10 +27,10 @@ import (
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/vishvananda/netlink"
"golang.org/x/sys/unix"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/urpc"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/urpc"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/specutils"
)
const (
@@ -228,7 +228,7 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, enableGSO
if err != nil {
return fmt.Errorf("getting link for interface %q: %v", iface.Name, err)
}
- link.LinkAddress = []byte(ifaceLink.Attrs().HardwareAddr)
+ link.LinkAddress = ifaceLink.Attrs().HardwareAddr
log.Debugf("Setting up network channels")
// Create the socket for the device.
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
index 032190636..4a11f617d 100644
--- a/runsc/sandbox/sandbox.go
+++ b/runsc/sandbox/sandbox.go
@@ -28,16 +28,17 @@ import (
"github.com/cenkalti/backoff"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/syndtr/gocapability/capability"
- "gvisor.googlesource.com/gvisor/pkg/control/client"
- "gvisor.googlesource.com/gvisor/pkg/control/server"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/control"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/kvm"
- "gvisor.googlesource.com/gvisor/pkg/urpc"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/cgroup"
- "gvisor.googlesource.com/gvisor/runsc/console"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/pkg/control/client"
+ "gvisor.dev/gvisor/pkg/control/server"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/control"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/urpc"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/boot/platforms"
+ "gvisor.dev/gvisor/runsc/cgroup"
+ "gvisor.dev/gvisor/runsc/console"
+ "gvisor.dev/gvisor/runsc/specutils"
)
// Sandbox wraps a sandbox process.
@@ -73,10 +74,46 @@ type Sandbox struct {
statusMu sync.Mutex
}
+// Args is used to configure a new sandbox.
+type Args struct {
+ // ID is the sandbox unique identifier.
+ ID string
+
+ // Spec is the OCI spec that describes the container.
+ Spec *specs.Spec
+
+ // BundleDir is the directory containing the container bundle.
+ BundleDir string
+
+ // ConsoleSocket is the path to a unix domain socket that will receive
+ // the console FD. It may be empty.
+ ConsoleSocket string
+
+ // UserLog is the filename to send user-visible logs to. It may be empty.
+ UserLog string
+
+ // IOFiles is the list of files that connect to a 9P endpoint for the mounts
+ // points using Gofers. They must be in the same order as mounts appear in
+ // the spec.
+ IOFiles []*os.File
+
+ // MountsFile is a file container mount information from the spec. It's
+ // equivalent to the mounts from the spec, except that all paths have been
+ // resolved to their final absolute location.
+ MountsFile *os.File
+
+ // Gcgroup is the cgroup that the sandbox is part of.
+ Cgroup *cgroup.Cgroup
+
+ // Attached indicates that the sandbox lifecycle is attached with the caller.
+ // If the caller exits, the sandbox should exit too.
+ Attached bool
+}
+
// New creates the sandbox process. The caller must call Destroy() on the
// sandbox.
-func New(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, userLog string, ioFiles []*os.File, specFile *os.File, cg *cgroup.Cgroup) (*Sandbox, error) {
- s := &Sandbox{ID: id, Cgroup: cg}
+func New(conf *boot.Config, args *Args) (*Sandbox, error) {
+ s := &Sandbox{ID: args.ID, Cgroup: args.Cgroup}
// The Cleanup object cleans up partially created sandboxes when an error
// occurs. Any errors occurring during cleanup itself are ignored.
c := specutils.MakeCleanup(func() {
@@ -93,7 +130,7 @@ func New(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocke
defer clientSyncFile.Close()
// Create the sandbox process.
- err = s.createSandboxProcess(spec, conf, bundleDir, consoleSocket, userLog, ioFiles, specFile, sandboxSyncFile)
+ err = s.createSandboxProcess(conf, args, sandboxSyncFile)
// sandboxSyncFile has to be closed to be able to detect when the sandbox
// process exits unexpectedly.
sandboxSyncFile.Close()
@@ -291,7 +328,7 @@ func (s *Sandbox) connError(err error) error {
// createSandboxProcess starts the sandbox as a subprocess by running the "boot"
// command, passing in the bundle dir.
-func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, userLog string, ioFiles []*os.File, mountsFile, startSyncFile *os.File) error {
+func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncFile *os.File) error {
// nextFD is used to get unused FDs that we can pass to the sandbox. It
// starts at 3 because 0, 1, and 2 are taken by stdin/out/err.
nextFD := 3
@@ -327,7 +364,7 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
// Add the "boot" command to the args.
//
// All flags after this must be for the boot command
- cmd.Args = append(cmd.Args, "boot", "--bundle="+bundleDir)
+ cmd.Args = append(cmd.Args, "boot", "--bundle="+args.BundleDir)
// Create a socket for the control server and donate it to the sandbox.
addr := boot.ControlSocketAddr(s.ID)
@@ -342,12 +379,12 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
cmd.Args = append(cmd.Args, "--controller-fd="+strconv.Itoa(nextFD))
nextFD++
- defer mountsFile.Close()
- cmd.ExtraFiles = append(cmd.ExtraFiles, mountsFile)
+ defer args.MountsFile.Close()
+ cmd.ExtraFiles = append(cmd.ExtraFiles, args.MountsFile)
cmd.Args = append(cmd.Args, "--mounts-fd="+strconv.Itoa(nextFD))
nextFD++
- specFile, err := specutils.OpenSpec(bundleDir)
+ specFile, err := specutils.OpenSpec(args.BundleDir)
if err != nil {
return err
}
@@ -361,7 +398,7 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
nextFD++
// If there is a gofer, sends all socket ends to the sandbox.
- for _, f := range ioFiles {
+ for _, f := range args.IOFiles {
defer f.Close()
cmd.ExtraFiles = append(cmd.ExtraFiles, f)
cmd.Args = append(cmd.Args, "--io-fds="+strconv.Itoa(nextFD))
@@ -389,23 +426,22 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
// If the console control socket file is provided, then create a new
// pty master/slave pair and set the TTY on the sandbox process.
- if consoleSocket != "" {
+ if args.ConsoleSocket != "" {
cmd.Args = append(cmd.Args, "--console=true")
// console.NewWithSocket will send the master on the given
// socket, and return the slave.
- tty, err := console.NewWithSocket(consoleSocket)
+ tty, err := console.NewWithSocket(args.ConsoleSocket)
if err != nil {
- return fmt.Errorf("setting up console with socket %q: %v", consoleSocket, err)
+ return fmt.Errorf("setting up console with socket %q: %v", args.ConsoleSocket, err)
}
defer tty.Close()
// Set the TTY as a controlling TTY on the sandbox process.
- // Note that the Ctty field must be the FD of the TTY in the
- // *new* process, not this process. Since we are about to
- // assign the TTY to nextFD, we can use that value here.
- // stdin, we can use FD 0 here.
cmd.SysProcAttr.Setctty = true
+ // The Ctty FD must be the FD in the child process's FD table,
+ // which will be nextFD in this case.
+ // See https://github.com/golang/go/issues/29458.
cmd.SysProcAttr.Ctty = nextFD
// Pass the tty as all stdio fds to sandbox.
@@ -456,7 +492,7 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
{Type: specs.UTSNamespace},
}
- if conf.Platform == boot.PlatformPtrace {
+ if conf.Platform == platforms.Ptrace {
// TODO(b/75837838): Also set a new PID namespace so that we limit
// access to other host processes.
log.Infof("Sandbox will be started in the current PID namespace")
@@ -469,7 +505,7 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
// Joins the network namespace if network is enabled. the sandbox talks
// directly to the host network, which may have been configured in the
// namespace.
- if ns, ok := specutils.GetNS(specs.NetworkNamespace, spec); ok && conf.Network != boot.NetworkNone {
+ if ns, ok := specutils.GetNS(specs.NetworkNamespace, args.Spec); ok && conf.Network != boot.NetworkNone {
log.Infof("Sandbox will be started in the container's network namespace: %+v", ns)
nss = append(nss, ns)
} else if conf.Network == boot.NetworkHost {
@@ -483,10 +519,10 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
// inside the user namespace specified in the spec or the current namespace
// if none is configured.
if conf.Network == boot.NetworkHost {
- if userns, ok := specutils.GetNS(specs.UserNamespace, spec); ok {
+ if userns, ok := specutils.GetNS(specs.UserNamespace, args.Spec); ok {
log.Infof("Sandbox will be started in container's user namespace: %+v", userns)
nss = append(nss, userns)
- specutils.SetUIDGIDMappings(cmd, spec)
+ specutils.SetUIDGIDMappings(cmd, args.Spec)
} else {
log.Infof("Sandbox will be started in the current user namespace")
}
@@ -515,46 +551,64 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
} else if specutils.HasCapabilities(capability.CAP_SETUID, capability.CAP_SETGID) {
log.Infof("Sandbox will be started in new user namespace")
nss = append(nss, specs.LinuxNamespace{Type: specs.UserNamespace})
+ cmd.Args = append(cmd.Args, "--setup-root")
- // Map nobody in the new namespace to nobody in the parent namespace.
- //
- // A sandbox process will construct an empty
- // root for itself, so it has to have the CAP_SYS_ADMIN
- // capability.
- //
- // FIXME(b/122554829): The current implementations of
- // os/exec doesn't allow to set ambient capabilities if
- // a process is started in a new user namespace. As a
- // workaround, we start the sandbox process with the 0
- // UID and then it constructs a chroot and sets UID to
- // nobody. https://github.com/golang/go/issues/2315
- const nobody = 65534
- cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
- {
- ContainerID: int(0),
- HostID: int(nobody - 1),
- Size: int(1),
- },
- {
- ContainerID: int(nobody),
- HostID: int(nobody),
- Size: int(1),
- },
- }
- cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
- {
- ContainerID: int(nobody),
- HostID: int(nobody),
- Size: int(1),
- },
+ if conf.Rootless {
+ log.Infof("Rootless mode: sandbox will run as root inside user namespace, mapped to the current user, uid: %d, gid: %d", os.Getuid(), os.Getgid())
+ cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
+ {
+ ContainerID: 0,
+ HostID: os.Getuid(),
+ Size: 1,
+ },
+ }
+ cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
+ {
+ ContainerID: 0,
+ HostID: os.Getgid(),
+ Size: 1,
+ },
+ }
+ cmd.SysProcAttr.Credential = &syscall.Credential{Uid: 0, Gid: 0}
+
+ } else {
+ // Map nobody in the new namespace to nobody in the parent namespace.
+ //
+ // A sandbox process will construct an empty
+ // root for itself, so it has to have the CAP_SYS_ADMIN
+ // capability.
+ //
+ // FIXME(b/122554829): The current implementations of
+ // os/exec doesn't allow to set ambient capabilities if
+ // a process is started in a new user namespace. As a
+ // workaround, we start the sandbox process with the 0
+ // UID and then it constructs a chroot and sets UID to
+ // nobody. https://github.com/golang/go/issues/2315
+ const nobody = 65534
+ cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
+ {
+ ContainerID: 0,
+ HostID: nobody - 1,
+ Size: 1,
+ },
+ {
+ ContainerID: nobody,
+ HostID: nobody,
+ Size: 1,
+ },
+ }
+ cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
+ {
+ ContainerID: nobody,
+ HostID: nobody,
+ Size: 1,
+ },
+ }
+
+ // Set credentials to run as user and group nobody.
+ cmd.SysProcAttr.Credential = &syscall.Credential{Uid: 0, Gid: nobody}
}
- // Set credentials to run as user and group nobody.
- cmd.SysProcAttr.Credential = &syscall.Credential{
- Uid: 0,
- Gid: nobody,
- }
- cmd.Args = append(cmd.Args, "--setup-root")
} else {
return fmt.Errorf("can't run sandbox process as user nobody since we don't have CAP_SETUID or CAP_SETGID")
}
@@ -580,8 +634,8 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
}
}
- if userLog != "" {
- f, err := os.OpenFile(userLog, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0664)
+ if args.UserLog != "" {
+ f, err := os.OpenFile(args.UserLog, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0664)
if err != nil {
return fmt.Errorf("opening compat log file: %v", err)
}
@@ -600,6 +654,11 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
log.Debugf("Donating FD %d: %q", i+3, f.Name())
}
+ if args.Attached {
+ // Kill sandbox if parent process exits in attached mode.
+ cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL
+ }
+
log.Debugf("Starting sandbox: %s %v", binPath, cmd.Args)
log.Debugf("SysProcAttr: %+v", cmd.SysProcAttr)
if err := specutils.StartInNS(cmd, nss); err != nil {
@@ -902,7 +961,7 @@ func (s *Sandbox) StartTrace(f *os.File) error {
return nil
}
-// StopTrace stops a previously started trace..
+// StopTrace stops a previously started trace.
func (s *Sandbox) StopTrace() error {
log.Debugf("Trace stop %q", s.ID)
conn, err := s.sandboxConnect()
@@ -917,6 +976,21 @@ func (s *Sandbox) StopTrace() error {
return nil
}
+// ChangeLogging changes logging options.
+func (s *Sandbox) ChangeLogging(args control.LoggingArgs) error {
+ log.Debugf("Change logging start %q", s.ID)
+ conn, err := s.sandboxConnect()
+ if err != nil {
+ return err
+ }
+ defer conn.Close()
+
+ if err := conn.Call(boot.ChangeLogging, &args, nil); err != nil {
+ return fmt.Errorf("changing sandbox %q logging: %v", s.ID, err)
+ }
+ return nil
+}
+
// DestroyContainer destroys the given container. If it is the root container,
// then the entire sandbox is destroyed.
func (s *Sandbox) DestroyContainer(cid string) error {
@@ -973,19 +1047,15 @@ func (s *Sandbox) waitForStopped() error {
// deviceFileForPlatform opens the device file for the given platform. If the
// platform does not need a device file, then nil is returned.
-func deviceFileForPlatform(p boot.PlatformType) (*os.File, error) {
- var (
- f *os.File
- err error
- )
- switch p {
- case boot.PlatformKVM:
- f, err = kvm.OpenDevice()
- default:
- return nil, nil
+func deviceFileForPlatform(name string) (*os.File, error) {
+ p, err := platform.Lookup(name)
+ if err != nil {
+ return nil, err
}
+
+ f, err := p.OpenDevice()
if err != nil {
return nil, fmt.Errorf("opening device file for platform %q: %v", p, err)
}
- return f, err
+ return f, nil
}
diff --git a/runsc/specutils/BUILD b/runsc/specutils/BUILD
index 15476de6f..fbfb8e2f8 100644
--- a/runsc/specutils/BUILD
+++ b/runsc/specutils/BUILD
@@ -9,11 +9,8 @@ go_library(
"namespace.go",
"specutils.go",
],
- importpath = "gvisor.googlesource.com/gvisor/runsc/specutils",
- visibility = [
- "//runsc:__subpackages__",
- "//test:__subpackages__",
- ],
+ importpath = "gvisor.dev/gvisor/runsc/specutils",
+ visibility = ["//:sandbox"],
deps = [
"//pkg/abi/linux",
"//pkg/log",
diff --git a/runsc/specutils/fs.go b/runsc/specutils/fs.go
index 1f3afb4e4..138aa4dd1 100644
--- a/runsc/specutils/fs.go
+++ b/runsc/specutils/fs.go
@@ -16,6 +16,7 @@ package specutils
import (
"fmt"
+ "math/bits"
"path"
"syscall"
@@ -86,7 +87,7 @@ func OptionsToFlags(opts []string) uint32 {
// PropOptionsToFlags converts propagation mount options to syscall flags.
// Propagation options cannot be set other with other options and must be
-// handled separatedly.
+// handled separately.
func PropOptionsToFlags(opts []string) uint32 {
return optionsToFlags(opts, propOptionsMap)
}
@@ -105,22 +106,30 @@ func optionsToFlags(opts []string, source map[string]mapping) uint32 {
return rv
}
-// ValidateMount validates that spec mounts are correct.
+// validateMount validates that spec mounts are correct.
func validateMount(mnt *specs.Mount) error {
if !path.IsAbs(mnt.Destination) {
return fmt.Errorf("Mount.Destination must be an absolute path: %v", mnt)
}
-
if mnt.Type == "bind" {
- for _, o := range mnt.Options {
- if ContainsStr(invalidOptions, o) {
- return fmt.Errorf("mount option %q is not supported: %v", o, mnt)
- }
- _, ok1 := optionsMap[o]
- _, ok2 := propOptionsMap[o]
- if !ok1 && !ok2 {
- return fmt.Errorf("unknown mount option %q", o)
- }
+ return ValidateMountOptions(mnt.Options)
+ }
+ return nil
+}
+
+// ValidateMountOptions validates that mount options are correct.
+func ValidateMountOptions(opts []string) error {
+ for _, o := range opts {
+ if ContainsStr(invalidOptions, o) {
+ return fmt.Errorf("mount option %q is not supported", o)
+ }
+ _, ok1 := optionsMap[o]
+ _, ok2 := propOptionsMap[o]
+ if !ok1 && !ok2 {
+ return fmt.Errorf("unknown mount option %q", o)
+ }
+ if err := validatePropagation(o); err != nil {
+ return err
}
}
return nil
@@ -133,5 +142,14 @@ func validateRootfsPropagation(opt string) error {
if flags&(syscall.MS_SLAVE|syscall.MS_PRIVATE) == 0 {
return fmt.Errorf("root mount propagation option must specify private or slave: %q", opt)
}
+ return validatePropagation(opt)
+}
+
+func validatePropagation(opt string) error {
+ flags := PropOptionsToFlags([]string{opt})
+ exclusive := flags & (syscall.MS_SLAVE | syscall.MS_PRIVATE | syscall.MS_SHARED | syscall.MS_UNBINDABLE)
+ if bits.OnesCount32(exclusive) > 1 {
+ return fmt.Errorf("mount propagation options are mutually exclusive: %q", opt)
+ }
return nil
}
diff --git a/runsc/specutils/namespace.go b/runsc/specutils/namespace.go
index 7d194335c..d441419cb 100644
--- a/runsc/specutils/namespace.go
+++ b/runsc/specutils/namespace.go
@@ -25,7 +25,7 @@ import (
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/syndtr/gocapability/capability"
"golang.org/x/sys/unix"
- "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/log"
)
// nsCloneFlag returns the clone flag that can be used to set a namespace of
@@ -204,7 +204,7 @@ func SetUIDGIDMappings(cmd *exec.Cmd, s *specs.Spec) {
}
}
-// HasCapabilities returns true if the user has all capabilties in 'cs'.
+// HasCapabilities returns true if the user has all capabilities in 'cs'.
func HasCapabilities(cs ...capability.Cap) bool {
caps, err := capability.NewPid2(os.Getpid())
if err != nil {
@@ -220,3 +220,55 @@ func HasCapabilities(cs ...capability.Cap) bool {
}
return true
}
+
+// MaybeRunAsRoot ensures the process runs with capabilities needed to create a
+// sandbox, e.g. CAP_SYS_ADMIN, CAP_SYS_CHROOT, etc. If capabilities are needed,
+// it will create a new user namespace and re-execute the process as root
+// inside the namespace with the same arguments and environment.
+//
+// This function returns immediately when no new capability is needed. If
+// another process is executed, it returns straight from here with the same exit
+// code as the child.
+func MaybeRunAsRoot() error {
+ if HasCapabilities(capability.CAP_SYS_ADMIN, capability.CAP_SYS_CHROOT, capability.CAP_SETUID, capability.CAP_SETGID) {
+ return nil
+ }
+
+ // Current process doesn't have required capabilities, create user namespace
+ // and run as root inside the namespace to acquire capabilities.
+ log.Infof("*** Re-running as root in new user namespace ***")
+
+ cmd := exec.Command("/proc/self/exe", os.Args[1:]...)
+
+ cmd.SysProcAttr = &syscall.SysProcAttr{
+ Cloneflags: syscall.CLONE_NEWUSER | syscall.CLONE_NEWNS,
+ // Set current user/group as root inside the namespace. Since we may not
+ // have CAP_SETUID/CAP_SETGID, just map root to the current user/group.
+ UidMappings: []syscall.SysProcIDMap{
+ {ContainerID: 0, HostID: os.Getuid(), Size: 1},
+ },
+ GidMappings: []syscall.SysProcIDMap{
+ {ContainerID: 0, HostID: os.Getgid(), Size: 1},
+ },
+ Credential: &syscall.Credential{Uid: 0, Gid: 0},
+ GidMappingsEnableSetgroups: false,
+ }
+
+ cmd.Env = os.Environ()
+ cmd.Stdin = os.Stdin
+ cmd.Stdout = os.Stdout
+ cmd.Stderr = os.Stderr
+ if err := cmd.Run(); err != nil {
+ if exit, ok := err.(*exec.ExitError); ok {
+ if ws, ok := exit.Sys().(syscall.WaitStatus); ok {
+ os.Exit(ws.ExitStatus())
+ }
+ log.Warningf("No wait status provided, exiting with -1: %v", err)
+ os.Exit(-1)
+ }
+ return fmt.Errorf("re-executing self: %v", err)
+ }
+ // Child completed with success.
+ os.Exit(0)
+ panic("unreachable")
+}
diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go
index 2888f55db..0b40e38a3 100644
--- a/runsc/specutils/specutils.go
+++ b/runsc/specutils/specutils.go
@@ -29,9 +29,9 @@ import (
"github.com/cenkalti/backoff"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
)
// ExePath must point to runsc binary, which is normally the same binary. It's
@@ -394,7 +394,7 @@ func WaitForReady(pid int, timeout time.Duration, ready func() (bool, error)) er
// DebugLogFile opens a log file using 'logPattern' as location. If 'logPattern'
// ends with '/', it's used as a directory with default file name.
-// 'logPattern' can contain variables that are substitued:
+// 'logPattern' can contain variables that are substituted:
// - %TIMESTAMP%: is replaced with a timestamp using the following format:
// <yyyymmdd-hhmmss.uuuuuu>
// - %COMMAND%: is replaced with 'command'
diff --git a/runsc/test/BUILD b/runsc/test/BUILD
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/runsc/test/BUILD
diff --git a/runsc/test/build_defs.bzl b/runsc/test/build_defs.bzl
new file mode 100644
index 000000000..ac28cc037
--- /dev/null
+++ b/runsc/test/build_defs.bzl
@@ -0,0 +1,19 @@
+"""Defines a rule for runsc test targets."""
+
+load("@io_bazel_rules_go//go:def.bzl", _go_test = "go_test")
+
+# runtime_test is a macro that will create targets to run the given test target
+# with different runtime options.
+def runtime_test(**kwargs):
+ """Runs the given test target with different runtime options."""
+ name = kwargs["name"]
+ _go_test(**kwargs)
+ kwargs["name"] = name + "_hostnet"
+ kwargs["args"] = ["--runtime-type=hostnet"]
+ _go_test(**kwargs)
+ kwargs["name"] = name + "_kvm"
+ kwargs["args"] = ["--runtime-type=kvm"]
+ _go_test(**kwargs)
+ kwargs["name"] = name + "_overlay"
+ kwargs["args"] = ["--runtime-type=overlay"]
+ _go_test(**kwargs)
diff --git a/runsc/test/image/BUILD b/runsc/test/image/BUILD
index e8b629c6a..58758fde5 100644
--- a/runsc/test/image/BUILD
+++ b/runsc/test/image/BUILD
@@ -1,8 +1,9 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+load("//runsc/test:build_defs.bzl", "runtime_test")
package(licenses = ["notice"])
-go_test(
+runtime_test(
name = "image_test",
size = "large",
srcs = [
@@ -26,5 +27,5 @@ go_test(
go_library(
name = "image",
srcs = ["image.go"],
- importpath = "gvisor.googlesource.com/gvisor/runsc/test/image",
+ importpath = "gvisor.dev/gvisor/runsc/test/image",
)
diff --git a/runsc/test/image/image_test.go b/runsc/test/image/image_test.go
index b969731b0..ddaa2c13b 100644
--- a/runsc/test/image/image_test.go
+++ b/runsc/test/image/image_test.go
@@ -32,7 +32,7 @@ import (
"testing"
"time"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
func TestHelloWorld(t *testing.T) {
@@ -209,11 +209,14 @@ func TestMysql(t *testing.T) {
}
func TestPythonHello(t *testing.T) {
- if err := testutil.Pull("google/python-hello"); err != nil {
+ // TODO(b/136503277): Once we have more complete python runtime tests,
+ // we can drop this one.
+ const img = "gcr.io/gvisor-presubmit/python-hello"
+ if err := testutil.Pull(img); err != nil {
t.Fatalf("docker pull failed: %v", err)
}
d := testutil.MakeDocker("python-hello-test")
- if err := d.Run("-p", "8080", "google/python-hello"); err != nil {
+ if err := d.Run("-p", "8080", img); err != nil {
t.Fatalf("docker run failed: %v", err)
}
defer d.CleanUp()
diff --git a/runsc/test/integration/BUILD b/runsc/test/integration/BUILD
index 04ed885c6..12065617c 100644
--- a/runsc/test/integration/BUILD
+++ b/runsc/test/integration/BUILD
@@ -1,8 +1,9 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+load("//runsc/test:build_defs.bzl", "runtime_test")
package(licenses = ["notice"])
-go_test(
+runtime_test(
name = "integration_test",
size = "large",
srcs = [
@@ -25,5 +26,5 @@ go_test(
go_library(
name = "integration",
srcs = ["integration.go"],
- importpath = "gvisor.googlesource.com/gvisor/runsc/test/integration",
+ importpath = "gvisor.dev/gvisor/runsc/test/integration",
)
diff --git a/runsc/test/integration/exec_test.go b/runsc/test/integration/exec_test.go
index 7c0e61ac3..993136f96 100644
--- a/runsc/test/integration/exec_test.go
+++ b/runsc/test/integration/exec_test.go
@@ -34,8 +34,8 @@ import (
"testing"
"time"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
func TestExecCapabilities(t *testing.T) {
diff --git a/runsc/test/integration/integration_test.go b/runsc/test/integration/integration_test.go
index c51cab3ae..7cef4b9dd 100644
--- a/runsc/test/integration/integration_test.go
+++ b/runsc/test/integration/integration_test.go
@@ -32,7 +32,7 @@ import (
"testing"
"time"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
// httpRequestSucceeds sends a request to a given url and checks that the status is OK.
@@ -86,16 +86,17 @@ func TestLifeCycle(t *testing.T) {
}
func TestPauseResume(t *testing.T) {
+ const img = "gcr.io/gvisor-presubmit/python-hello"
if !testutil.IsPauseResumeSupported() {
t.Log("Pause/resume is not supported, skipping test.")
return
}
- if err := testutil.Pull("google/python-hello"); err != nil {
+ if err := testutil.Pull(img); err != nil {
t.Fatal("docker pull failed:", err)
}
d := testutil.MakeDocker("pause-resume-test")
- if err := d.Run("-p", "8080", "google/python-hello"); err != nil {
+ if err := d.Run("-p", "8080", img); err != nil {
t.Fatalf("docker run failed: %v", err)
}
defer d.CleanUp()
@@ -149,15 +150,16 @@ func TestPauseResume(t *testing.T) {
}
func TestCheckpointRestore(t *testing.T) {
+ const img = "gcr.io/gvisor-presubmit/python-hello"
if !testutil.IsPauseResumeSupported() {
t.Log("Pause/resume is not supported, skipping test.")
return
}
- if err := testutil.Pull("google/python-hello"); err != nil {
+ if err := testutil.Pull(img); err != nil {
t.Fatal("docker pull failed:", err)
}
d := testutil.MakeDocker("save-restore-test")
- if err := d.Run("-p", "8080", "google/python-hello"); err != nil {
+ if err := d.Run("-p", "8080", img); err != nil {
t.Fatalf("docker run failed: %v", err)
}
defer d.CleanUp()
diff --git a/runsc/test/integration/regression_test.go b/runsc/test/integration/regression_test.go
index 80bae9970..39b30e757 100644
--- a/runsc/test/integration/regression_test.go
+++ b/runsc/test/integration/regression_test.go
@@ -18,7 +18,7 @@ import (
"strings"
"testing"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
// Test that UDS can be created using overlay when parent directory is in lower
diff --git a/runsc/test/root/BUILD b/runsc/test/root/BUILD
index 7ded78baa..500ef7b8e 100644
--- a/runsc/test/root/BUILD
+++ b/runsc/test/root/BUILD
@@ -5,7 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "root",
srcs = ["root.go"],
- importpath = "gvisor.googlesource.com/gvisor/runsc/test/root",
+ importpath = "gvisor.dev/gvisor/runsc/test/root",
)
go_test(
diff --git a/runsc/test/root/cgroup_test.go b/runsc/test/root/cgroup_test.go
index edb6dee1d..5392dc6e0 100644
--- a/runsc/test/root/cgroup_test.go
+++ b/runsc/test/root/cgroup_test.go
@@ -25,8 +25,8 @@ import (
"strings"
"testing"
- "gvisor.googlesource.com/gvisor/runsc/cgroup"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/runsc/cgroup"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
func verifyPid(pid int, path string) error {
diff --git a/runsc/test/root/chroot_test.go b/runsc/test/root/chroot_test.go
index da2f473b9..d0f236580 100644
--- a/runsc/test/root/chroot_test.go
+++ b/runsc/test/root/chroot_test.go
@@ -31,8 +31,8 @@ import (
"testing"
"github.com/syndtr/gocapability/capability"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
// TestChroot verifies that the sandbox is chroot'd and that mounts are cleaned
diff --git a/runsc/test/root/crictl_test.go b/runsc/test/root/crictl_test.go
index 3cc176104..515ae2df1 100644
--- a/runsc/test/root/crictl_test.go
+++ b/runsc/test/root/crictl_test.go
@@ -29,9 +29,9 @@ import (
"testing"
"time"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
- "gvisor.googlesource.com/gvisor/runsc/test/root/testdata"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/runsc/test/root/testdata"
+ "gvisor.dev/gvisor/runsc/test/testutil"
)
// Tests for crictl have to be run as root (rather than in a user namespace)
diff --git a/runsc/test/root/testdata/BUILD b/runsc/test/root/testdata/BUILD
index 7f272dcd3..80dc5f214 100644
--- a/runsc/test/root/testdata/BUILD
+++ b/runsc/test/root/testdata/BUILD
@@ -11,7 +11,7 @@ go_library(
"httpd_mount_paths.go",
"sandbox.go",
],
- importpath = "gvisor.googlesource.com/gvisor/runsc/test/root/testdata",
+ importpath = "gvisor.dev/gvisor/runsc/test/root/testdata",
visibility = [
"//visibility:public",
],
diff --git a/runsc/test/testutil/BUILD b/runsc/test/testutil/BUILD
index ddec81444..327e7ca4d 100644
--- a/runsc/test/testutil/BUILD
+++ b/runsc/test/testutil/BUILD
@@ -10,7 +10,7 @@ go_library(
"testutil.go",
"testutil_race.go",
],
- importpath = "gvisor.googlesource.com/gvisor/runsc/test/testutil",
+ importpath = "gvisor.dev/gvisor/runsc/test/testutil",
visibility = ["//:sandbox"],
deps = [
"//runsc/boot",
@@ -18,6 +18,5 @@ go_library(
"@com_github_cenkalti_backoff//:go_default_library",
"@com_github_kr_pty//:go_default_library",
"@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
- "@com_github_syndtr_gocapability//capability:go_default_library",
],
)
diff --git a/runsc/test/testutil/docker.go b/runsc/test/testutil/docker.go
index 81f5a9ef0..3f3e191b0 100644
--- a/runsc/test/testutil/docker.go
+++ b/runsc/test/testutil/docker.go
@@ -15,6 +15,7 @@
package testutil
import (
+ "flag"
"fmt"
"io/ioutil"
"log"
@@ -30,10 +31,15 @@ import (
"github.com/kr/pty"
)
+var runtimeType = flag.String("runtime-type", "", "specify which runtime to use: kvm, hostnet, overlay")
+
func getRuntime() string {
r, ok := os.LookupEnv("RUNSC_RUNTIME")
if !ok {
- return "runsc-test"
+ r = "runsc-test"
+ }
+ if *runtimeType != "" {
+ r += "-" + *runtimeType
}
return r
}
@@ -197,7 +203,7 @@ func (d *Docker) Stop() error {
}
// Run calls 'docker run' with the arguments provided. The container starts
-// running in the backgroud and the call returns immediately.
+// running in the background and the call returns immediately.
func (d *Docker) Run(args ...string) error {
a := []string{"run", "--runtime", d.Runtime, "--name", d.Name, "-d"}
a = append(a, args...)
diff --git a/runsc/test/testutil/testutil.go b/runsc/test/testutil/testutil.go
index 727b648a6..a98675bfc 100644
--- a/runsc/test/testutil/testutil.go
+++ b/runsc/test/testutil/testutil.go
@@ -30,7 +30,6 @@ import (
"os/exec"
"os/signal"
"path/filepath"
- "runtime"
"strings"
"sync"
"sync/atomic"
@@ -39,9 +38,8 @@ import (
"github.com/cenkalti/backoff"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "github.com/syndtr/gocapability/capability"
- "gvisor.googlesource.com/gvisor/runsc/boot"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/runsc/boot"
+ "gvisor.dev/gvisor/runsc/specutils"
)
func init() {
@@ -134,6 +132,7 @@ func TestConfig() *boot.Config {
LogPackets: true,
Network: boot.NetworkNone,
Strace: true,
+ Platform: "ptrace",
FileAccess: boot.FileAccessExclusive,
TestOnlyAllowRunAsCurrentUserWithoutChroot: true,
NumNetworkChannels: 1,
@@ -284,54 +283,6 @@ func WaitForHTTP(port int, timeout time.Duration) error {
return Poll(cb, timeout)
}
-// RunAsRoot ensures the test runs with CAP_SYS_ADMIN and CAP_SYS_CHROOT. If
-// needed it will create a new user namespace and re-execute the test as root
-// inside of the namespace. This function returns when it's running as root. If
-// it needs to create another process, it will exit from there and not return.
-func RunAsRoot() {
- if specutils.HasCapabilities(capability.CAP_SYS_ADMIN, capability.CAP_SYS_CHROOT) {
- return
- }
-
- fmt.Println("*** Re-running test as root in new user namespace ***")
-
- // Current process doesn't have CAP_SYS_ADMIN, create user namespace and run
- // as root inside that namespace to get it.
- runtime.LockOSThread()
- defer runtime.UnlockOSThread()
-
- cmd := exec.Command("/proc/self/exe", os.Args[1:]...)
- cmd.SysProcAttr = &syscall.SysProcAttr{
- Cloneflags: syscall.CLONE_NEWUSER | syscall.CLONE_NEWNS,
- // Set current user/group as root inside the namespace.
- UidMappings: []syscall.SysProcIDMap{
- {ContainerID: 0, HostID: os.Getuid(), Size: 1},
- },
- GidMappings: []syscall.SysProcIDMap{
- {ContainerID: 0, HostID: os.Getgid(), Size: 1},
- },
- GidMappingsEnableSetgroups: false,
- Credential: &syscall.Credential{
- Uid: 0,
- Gid: 0,
- },
- }
- cmd.Env = os.Environ()
- cmd.Stdin = os.Stdin
- cmd.Stdout = os.Stdout
- cmd.Stderr = os.Stderr
- if err := cmd.Run(); err != nil {
- if exit, ok := err.(*exec.ExitError); ok {
- if ws, ok := exit.Sys().(syscall.WaitStatus); ok {
- os.Exit(ws.ExitStatus())
- }
- os.Exit(-1)
- }
- panic(fmt.Sprint("error running child process:", err.Error()))
- }
- os.Exit(0)
-}
-
// Reaper reaps child processes.
type Reaper struct {
// mu protects ch, which will be nil if the reaper is not running.
diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD
index 731e2aa85..4ac511740 100644
--- a/test/syscalls/BUILD
+++ b/test/syscalls/BUILD
@@ -255,7 +255,7 @@ syscall_test(test = "//test/syscalls/linux:pause_test")
syscall_test(
size = "large",
- add_overlay = False, # TODO(gvisor.dev/issue/318): enable when fixed.
+ add_overlay = True,
shard_count = 5,
test = "//test/syscalls/linux:pipe_test",
)
diff --git a/test/syscalls/gtest/BUILD b/test/syscalls/gtest/BUILD
index 22e061652..9293f25cb 100644
--- a/test/syscalls/gtest/BUILD
+++ b/test/syscalls/gtest/BUILD
@@ -5,7 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "gtest",
srcs = ["gtest.go"],
- importpath = "gvisor.googlesource.com/gvisor/test/syscalls/gtest",
+ importpath = "gvisor.dev/gvisor/test/syscalls/gtest",
visibility = [
"//test:__subpackages__",
],
diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD
index 9bafc6e4f..4735284ba 100644
--- a/test/syscalls/linux/BUILD
+++ b/test/syscalls/linux/BUILD
@@ -42,6 +42,7 @@ cc_binary(
name = "exec_state_workload",
testonly = 1,
srcs = ["exec_state_workload.cc"],
+ deps = ["@com_google_absl//absl/strings"],
)
sh_binary(
@@ -984,6 +985,7 @@ cc_binary(
"//test/util:file_descriptor",
"//test/util:logging",
"//test/util:memory_util",
+ "//test/util:multiprocess_util",
"//test/util:posix_error",
"//test/util:temp_path",
"//test/util:test_main",
@@ -1175,6 +1177,7 @@ cc_binary(
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
+ "//test/util:thread_util",
"@com_google_absl//absl/strings",
"@com_google_googletest//:gtest",
],
@@ -1909,6 +1912,7 @@ cc_library(
":unix_domain_socket_test_util",
"//test/util:test_util",
"@com_google_absl//absl/strings",
+ "@com_google_absl//absl/strings:str_format",
"@com_google_googletest//:gtest",
],
alwayslink = 1,
@@ -2427,6 +2431,7 @@ cc_binary(
"//test/util:file_descriptor",
"//test/util:test_main",
"//test/util:test_util",
+ "@com_google_absl//absl/strings:str_format",
"@com_google_googletest//:gtest",
],
)
@@ -2936,6 +2941,8 @@ cc_binary(
testonly = 1,
srcs = ["tcp_socket.cc"],
linkstatic = 1,
+ # FIXME(b/135470853)
+ tags = ["flaky"],
deps = [
":socket_test_util",
"//test/util:file_descriptor",
@@ -2994,6 +3001,8 @@ cc_binary(
testonly = 1,
srcs = ["timers.cc"],
linkstatic = 1,
+ # FIXME(b/136599201)
+ tags = ["flaky"],
deps = [
"//test/util:cleanup",
"//test/util:logging",
@@ -3336,3 +3345,18 @@ cc_binary(
"@com_google_googletest//:gtest",
],
)
+
+cc_binary(
+ name = "proc_net_tcp_test",
+ testonly = 1,
+ srcs = ["proc_net_tcp.cc"],
+ linkstatic = 1,
+ deps = [
+ ":ip_socket_test_util",
+ "//test/util:file_descriptor",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_absl//absl/strings",
+ "@com_google_googletest//:gtest",
+ ],
+)
diff --git a/test/syscalls/linux/chmod.cc b/test/syscalls/linux/chmod.cc
index 79e98597f..7e918b9b2 100644
--- a/test/syscalls/linux/chmod.cc
+++ b/test/syscalls/linux/chmod.cc
@@ -140,7 +140,8 @@ TEST(ChmodTest, FchmodatFile) {
SyscallSucceeds());
ASSERT_THAT(
- fchmodat(parent_fd, std::string(Basename(temp_file.path())).c_str(), 0444, 0),
+ fchmodat(parent_fd, std::string(Basename(temp_file.path())).c_str(), 0444,
+ 0),
SyscallSucceeds());
EXPECT_THAT(close(parent_fd), SyscallSucceeds());
@@ -165,8 +166,9 @@ TEST(ChmodTest, FchmodatDir) {
SyscallSucceeds());
EXPECT_THAT(close(fd), SyscallSucceeds());
- ASSERT_THAT(fchmodat(parent_fd, std::string(Basename(dir.path())).c_str(), 0, 0),
- SyscallSucceeds());
+ ASSERT_THAT(
+ fchmodat(parent_fd, std::string(Basename(dir.path())).c_str(), 0, 0),
+ SyscallSucceeds());
EXPECT_THAT(close(parent_fd), SyscallSucceeds());
EXPECT_THAT(open(dir.path().c_str(), O_RDONLY | O_DIRECTORY),
diff --git a/test/syscalls/linux/chown.cc b/test/syscalls/linux/chown.cc
index eb1762ddf..2e82f0b3a 100644
--- a/test/syscalls/linux/chown.cc
+++ b/test/syscalls/linux/chown.cc
@@ -186,10 +186,10 @@ INSTANTIATE_TEST_SUITE_P(
return errorFromReturn("fchownat-fd", rc);
},
[](const std::string& path, uid_t owner, gid_t group) -> PosixError {
- ASSIGN_OR_RETURN_ERRNO(
- auto dirfd, Open(std::string(Dirname(path)), O_DIRECTORY | O_RDONLY));
- int rc = fchownat(dirfd.get(), std::string(Basename(path)).c_str(), owner,
- group, 0);
+ ASSIGN_OR_RETURN_ERRNO(auto dirfd, Open(std::string(Dirname(path)),
+ O_DIRECTORY | O_RDONLY));
+ int rc = fchownat(dirfd.get(), std::string(Basename(path)).c_str(),
+ owner, group, 0);
MaybeSave();
return errorFromReturn("fchownat-dirfd", rc);
}));
diff --git a/test/syscalls/linux/chroot.cc b/test/syscalls/linux/chroot.cc
index a4354ff62..498c45f16 100644
--- a/test/syscalls/linux/chroot.cc
+++ b/test/syscalls/linux/chroot.cc
@@ -273,7 +273,8 @@ TEST(ChrootTest, ProcMemSelfMapsNoEscapeProcOpen) {
ASSERT_GT(bytes_read, 0);
// Finally we want to make sure the maps don't contain the chroot path
- ASSERT_EQ(std::string(buf, bytes_read).find(temp_dir.path()), std::string::npos);
+ ASSERT_EQ(std::string(buf, bytes_read).find(temp_dir.path()),
+ std::string::npos);
}
// Test that mounts outside the chroot will not appear in /proc/self/mounts or
diff --git a/test/syscalls/linux/eventfd.cc b/test/syscalls/linux/eventfd.cc
index 5e5c39d44..367682c3d 100644
--- a/test/syscalls/linux/eventfd.cc
+++ b/test/syscalls/linux/eventfd.cc
@@ -53,9 +53,9 @@ TEST(EventfdTest, Nonblock) {
void* read_three_times(void* arg) {
int efd = *reinterpret_cast<int*>(arg);
uint64_t l;
- read(efd, &l, sizeof(l));
- read(efd, &l, sizeof(l));
- read(efd, &l, sizeof(l));
+ EXPECT_THAT(read(efd, &l, sizeof(l)), SyscallSucceedsWithValue(sizeof(l)));
+ EXPECT_THAT(read(efd, &l, sizeof(l)), SyscallSucceedsWithValue(sizeof(l)));
+ EXPECT_THAT(read(efd, &l, sizeof(l)), SyscallSucceedsWithValue(sizeof(l)));
return nullptr;
}
@@ -160,7 +160,8 @@ TEST(EventfdTest, NotifyNonZero_NoRandomSave) {
ScopedThread t([&efd] {
sleep(5);
uint64_t val = 1;
- write(efd.get(), &val, sizeof(val));
+ EXPECT_THAT(write(efd.get(), &val, sizeof(val)),
+ SyscallSucceedsWithValue(sizeof(val)));
});
// epoll_wait should return once the thread writes.
diff --git a/test/syscalls/linux/exceptions.cc b/test/syscalls/linux/exceptions.cc
index 0da4c817d..370e85166 100644
--- a/test/syscalls/linux/exceptions.cc
+++ b/test/syscalls/linux/exceptions.cc
@@ -56,6 +56,26 @@ void inline Int3Normal() { asm(".byte 0xcd, 0x03\r\n"); }
void inline Int3Compact() { asm(".byte 0xcc\r\n"); }
+void InIOHelper(int width, int value) {
+ EXPECT_EXIT(
+ {
+ switch (width) {
+ case 1:
+ asm volatile("inb %%dx, %%al" ::"d"(value) : "%eax");
+ break;
+ case 2:
+ asm volatile("inw %%dx, %%ax" ::"d"(value) : "%eax");
+ break;
+ case 4:
+ asm volatile("inl %%dx, %%eax" ::"d"(value) : "%eax");
+ break;
+ default:
+ FAIL() << "invalid input width, only 1, 2 or 4 is allowed";
+ }
+ },
+ ::testing::KilledBySignal(SIGSEGV), "");
+}
+
TEST(ExceptionTest, Halt) {
// In order to prevent the regular handler from messing with things (and
// perhaps refaulting until some other signal occurs), we reset the handler to
@@ -87,6 +107,20 @@ TEST(ExceptionTest, DivideByZero) {
::testing::KilledBySignal(SIGFPE), "");
}
+TEST(ExceptionTest, IOAccessFault) {
+ // See above.
+ struct sigaction sa = {};
+ sa.sa_handler = SIG_DFL;
+ auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGSEGV, sa));
+
+ InIOHelper(1, 0x0);
+ InIOHelper(2, 0x7);
+ InIOHelper(4, 0x6);
+ InIOHelper(1, 0xffff);
+ InIOHelper(2, 0xffff);
+ InIOHelper(4, 0xfffd);
+}
+
TEST(ExceptionTest, Alignment) {
SetAlignmentCheck();
ClearAlignmentCheck();
diff --git a/test/syscalls/linux/exec.cc b/test/syscalls/linux/exec.cc
index 06c322a99..4c7c95321 100644
--- a/test/syscalls/linux/exec.cc
+++ b/test/syscalls/linux/exec.cc
@@ -255,7 +255,8 @@ TEST(ExecDeathTest, InterpreterScriptArgNUL) {
TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
GetAbsoluteTestTmpdir(),
- absl::StrCat("#!", link.path(), " foo", std::string(1, '\0'), "bar"), 0755));
+ absl::StrCat("#!", link.path(), " foo", std::string(1, '\0'), "bar"),
+ 0755));
CheckOutput(script.path(), {script.path()}, {}, ArgEnvExitStatus(2, 0),
absl::StrCat(link.path(), "\nfoo\n", script.path(), "\n"));
diff --git a/test/syscalls/linux/exec_proc_exe_workload.cc b/test/syscalls/linux/exec_proc_exe_workload.cc
index b3fbd5042..b790fe5be 100644
--- a/test/syscalls/linux/exec_proc_exe_workload.cc
+++ b/test/syscalls/linux/exec_proc_exe_workload.cc
@@ -21,7 +21,8 @@
#include "test/util/posix_error.h"
int main(int argc, char** argv, char** envp) {
- std::string exe = gvisor::testing::ProcessExePath(getpid()).ValueOrDie();
+ std::string exe =
+ gvisor::testing::ProcessExePath(getpid()).ValueOrDie();
if (exe[0] != '/') {
std::cerr << "relative path: " << exe << std::endl;
exit(1);
diff --git a/test/syscalls/linux/exec_state_workload.cc b/test/syscalls/linux/exec_state_workload.cc
index 725c2977f..028902b14 100644
--- a/test/syscalls/linux/exec_state_workload.cc
+++ b/test/syscalls/linux/exec_state_workload.cc
@@ -19,10 +19,13 @@
#include <sys/auxv.h>
#include <sys/prctl.h>
#include <sys/time.h>
+
#include <iostream>
#include <ostream>
#include <string>
+#include "absl/strings/numbers.h"
+
// Pretty-print a sigset_t.
std::ostream& operator<<(std::ostream& out, const sigset_t& s) {
out << "{ ";
@@ -138,15 +141,14 @@ int main(int argc, char** argv) {
return 1;
}
- char* end;
- uint32_t signo = strtoul(argv[2], &end, 10);
- if (end == argv[2]) {
+ uint32_t signo;
+ if (!absl::SimpleAtoi(argv[2], &signo)) {
std::cerr << "invalid signo: " << argv[2] << std::endl;
return 1;
}
- uintptr_t handler = strtoull(argv[3], &end, 16);
- if (end == argv[3]) {
+ uintptr_t handler;
+ if (!absl::numbers_internal::safe_strtoi_base(argv[3], &handler, 16)) {
std::cerr << "invalid handler: " << std::hex << argv[3] << std::endl;
return 1;
}
@@ -160,9 +162,8 @@ int main(int argc, char** argv) {
return 1;
}
- char* end;
- uint32_t signo = strtoul(argv[2], &end, 10);
- if (end == argv[2]) {
+ uint32_t signo;
+ if (!absl::SimpleAtoi(argv[2], &signo)) {
std::cerr << "invalid signo: " << argv[2] << std::endl;
return 1;
}
@@ -176,9 +177,8 @@ int main(int argc, char** argv) {
return 1;
}
- char* end;
- uint32_t timer = strtoul(argv[2], &end, 10);
- if (end == argv[2]) {
+ uint32_t timer;
+ if (!absl::SimpleAtoi(argv[2], &timer)) {
std::cerr << "invalid signo: " << argv[2] << std::endl;
return 1;
}
diff --git a/test/syscalls/linux/file_base.h b/test/syscalls/linux/file_base.h
index b5b972c07..36efabcae 100644
--- a/test/syscalls/linux/file_base.h
+++ b/test/syscalls/linux/file_base.h
@@ -160,7 +160,7 @@ class SocketTest : public ::testing::Test {
// MatchesStringLength checks that a tuple argument of (struct iovec *, int)
// corresponding to an iovec array and its length, contains data that matches
-// the std::string length strlen.
+// the string length strlen.
MATCHER_P(MatchesStringLength, strlen, "") {
struct iovec* iovs = arg.first;
int niov = arg.second;
@@ -177,7 +177,7 @@ MATCHER_P(MatchesStringLength, strlen, "") {
// MatchesStringValue checks that a tuple argument of (struct iovec *, int)
// corresponding to an iovec array and its length, contains data that matches
-// the std::string value str.
+// the string value str.
MATCHER_P(MatchesStringValue, str, "") {
struct iovec* iovs = arg.first;
int len = strlen(str);
diff --git a/test/syscalls/linux/flock.cc b/test/syscalls/linux/flock.cc
index d89cfcbd7..b4a91455d 100644
--- a/test/syscalls/linux/flock.cc
+++ b/test/syscalls/linux/flock.cc
@@ -428,7 +428,7 @@ TEST_F(FlockTest, TestDupFdFollowedByLock) {
ASSERT_THAT(flock(fd.get(), LOCK_UN), SyscallSucceedsWithValue(0));
}
-// NOTE: These blocking tests are not perfect. Unfortunantely it's very hard to
+// NOTE: These blocking tests are not perfect. Unfortunately it's very hard to
// determine if a thread was actually blocked in the kernel so we're forced
// to use timing.
TEST_F(FlockTest, BlockingLockNoBlockingForSharedLocks_NoRandomSave) {
diff --git a/test/syscalls/linux/getdents.cc b/test/syscalls/linux/getdents.cc
index d146c8db7..8e4efa8d6 100644
--- a/test/syscalls/linux/getdents.cc
+++ b/test/syscalls/linux/getdents.cc
@@ -364,7 +364,7 @@ TYPED_TEST(GetdentsTest, PartialBuffer) {
}
// Open many file descriptors, then scan through /proc/self/fd to find and close
-// them all. (The latter is commonly used to handle races betweek fork/execve
+// them all. (The latter is commonly used to handle races between fork/execve
// and the creation of unwanted non-O_CLOEXEC file descriptors.) This tests that
// getdents iterates correctly despite mutation of /proc/self/fd.
TYPED_TEST(GetdentsTest, ProcSelfFd) {
@@ -437,6 +437,19 @@ TYPED_TEST(GetdentsTest, SeekResetsCursor) {
EXPECT_EQ(expect, this->ReadAndCountAllEntries(&dirents));
}
+// Test that getdents() after SEEK_END succeeds.
+// This is a regression test for #128.
+TYPED_TEST(GetdentsTest, Issue128ProcSeekEnd) {
+ auto fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self", O_RDONLY | O_DIRECTORY));
+ typename TestFixture::DirentBufferType dirents(256);
+
+ ASSERT_THAT(lseek(fd.get(), 0, SEEK_END), SyscallSucceeds());
+ ASSERT_THAT(RetryEINTR(syscall)(this->SyscallNum(), fd.get(), dirents.Data(),
+ dirents.Size()),
+ SyscallSucceeds());
+}
+
// Some tests using the glibc readdir interface.
TEST(ReaddirTest, OpenDir) {
DIR* dev;
diff --git a/test/syscalls/linux/inotify.cc b/test/syscalls/linux/inotify.cc
index 6a3539e22..7384c27dc 100644
--- a/test/syscalls/linux/inotify.cc
+++ b/test/syscalls/linux/inotify.cc
@@ -122,8 +122,8 @@ std::string DumpEvents(const std::vector<Event>& events, int indent_level) {
(events.size() > 1) ? "s" : "");
int i = 0;
for (const Event& ev : events) {
- ss << StreamFormat("%sevents[%d]: %s\n", std::string(indent_level, '\t'), i++,
- DumpEvent(ev));
+ ss << StreamFormat("%sevents[%d]: %s\n", std::string(indent_level, '\t'),
+ i++, DumpEvent(ev));
}
return ss.str();
}
@@ -295,10 +295,10 @@ PosixErrorOr<std::vector<Event>> DrainEvents(int fd) {
if (event.len > 0) {
TEST_CHECK(static_cast<int>(sizeof(struct inotify_event) + event.len) <=
readlen);
- ev.name =
- std::string(cursor + offsetof(struct inotify_event, name)); // NOLINT
+ ev.name = std::string(cursor +
+ offsetof(struct inotify_event, name)); // NOLINT
// Name field should always be smaller than event.len, otherwise we have
- // a buffer overflow. The two sizes aren't equal because the std::string
+ // a buffer overflow. The two sizes aren't equal because the string
// constructor will stop at the first null byte, while event.name may be
// padded up to event.len using multiple null bytes.
TEST_CHECK(ev.name.size() <= event.len);
@@ -319,7 +319,8 @@ PosixErrorOr<FileDescriptor> InotifyInit1(int flags) {
return FileDescriptor(fd);
}
-PosixErrorOr<int> InotifyAddWatch(int fd, const std::string& path, uint32_t mask) {
+PosixErrorOr<int> InotifyAddWatch(int fd, const std::string& path,
+ uint32_t mask) {
int wd;
EXPECT_THAT(wd = inotify_add_watch(fd, path.c_str(), mask),
SyscallSucceeds());
@@ -980,8 +981,8 @@ TEST(Inotify, WatchOnRelativePath) {
EXPECT_THAT(chdir(root.path().c_str()), SyscallSucceeds());
// Add a watch on file1 with a relative path.
- const int wd = ASSERT_NO_ERRNO_AND_VALUE(
- InotifyAddWatch(fd.get(), std::string(Basename(file1.path())), IN_ALL_EVENTS));
+ const int wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch(
+ fd.get(), std::string(Basename(file1.path())), IN_ALL_EVENTS));
// Perform a read on file1, this should generate an IN_ACCESS event.
char c;
diff --git a/test/syscalls/linux/ioctl.cc b/test/syscalls/linux/ioctl.cc
index c525d41d2..4948a76f0 100644
--- a/test/syscalls/linux/ioctl.cc
+++ b/test/syscalls/linux/ioctl.cc
@@ -229,6 +229,37 @@ TEST_F(IoctlTest, FIOASYNCSelfTarget2) {
EXPECT_EQ(io_received, 1);
}
+// Check that closing an FD does not result in an event.
+TEST_F(IoctlTest, FIOASYNCSelfTargetClose) {
+ // Count SIGIOs received.
+ struct sigaction sa;
+ io_received = 0;
+ sa.sa_sigaction = inc_io_handler;
+ sigfillset(&sa.sa_mask);
+ sa.sa_flags = SA_RESTART;
+ auto sa_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGIO, sa));
+
+ // Actually allow SIGIO delivery.
+ auto mask_cleanup =
+ ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGIO));
+
+ for (int i = 0; i < 2; i++) {
+ auto pair = ASSERT_NO_ERRNO_AND_VALUE(
+ UnixDomainSocketPair(SOCK_SEQPACKET).Create());
+
+ pid_t pid = getpid();
+ EXPECT_THAT(ioctl(pair->second_fd(), FIOSETOWN, &pid), SyscallSucceeds());
+
+ int set = 1;
+ EXPECT_THAT(ioctl(pair->second_fd(), FIOASYNC, &set), SyscallSucceeds());
+ }
+
+ // FIXME(b/120624367): gVisor erroneously sends SIGIO on close.
+ SKIP_IF(IsRunningOnGvisor());
+
+ EXPECT_EQ(io_received, 0);
+}
+
TEST_F(IoctlTest, FIOASYNCInvalidPID) {
auto pair =
ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create());
diff --git a/test/syscalls/linux/ip_socket_test_util.cc b/test/syscalls/linux/ip_socket_test_util.cc
index 7612919d4..5fc4e9115 100644
--- a/test/syscalls/linux/ip_socket_test_util.cc
+++ b/test/syscalls/linux/ip_socket_test_util.cc
@@ -45,70 +45,79 @@ SocketPairKind IPv6TCPAcceptBindSocketPair(int type) {
std::string description =
absl::StrCat(DescribeSocketType(type), "connected IPv6 TCP socket");
return SocketPairKind{
- description, TCPAcceptBindSocketPairCreator(AF_INET6, type | SOCK_STREAM,
- 0, /* dual_stack = */ false)};
+ description, AF_INET6, type | SOCK_STREAM, IPPROTO_TCP,
+ TCPAcceptBindSocketPairCreator(AF_INET6, type | SOCK_STREAM, 0,
+ /* dual_stack = */ false)};
}
SocketPairKind IPv4TCPAcceptBindSocketPair(int type) {
std::string description =
absl::StrCat(DescribeSocketType(type), "connected IPv4 TCP socket");
return SocketPairKind{
- description, TCPAcceptBindSocketPairCreator(AF_INET, type | SOCK_STREAM,
- 0, /* dual_stack = */ false)};
+ description, AF_INET, type | SOCK_STREAM, IPPROTO_TCP,
+ TCPAcceptBindSocketPairCreator(AF_INET, type | SOCK_STREAM, 0,
+ /* dual_stack = */ false)};
}
SocketPairKind DualStackTCPAcceptBindSocketPair(int type) {
std::string description =
absl::StrCat(DescribeSocketType(type), "connected dual stack TCP socket");
return SocketPairKind{
- description, TCPAcceptBindSocketPairCreator(AF_INET6, type | SOCK_STREAM,
- 0, /* dual_stack = */ true)};
+ description, AF_INET6, type | SOCK_STREAM, IPPROTO_TCP,
+ TCPAcceptBindSocketPairCreator(AF_INET6, type | SOCK_STREAM, 0,
+ /* dual_stack = */ true)};
}
SocketPairKind IPv6UDPBidirectionalBindSocketPair(int type) {
std::string description =
absl::StrCat(DescribeSocketType(type), "connected IPv6 UDP socket");
- return SocketPairKind{description, UDPBidirectionalBindSocketPairCreator(
- AF_INET6, type | SOCK_DGRAM, 0,
- /* dual_stack = */ false)};
+ return SocketPairKind{
+ description, AF_INET6, type | SOCK_DGRAM, IPPROTO_UDP,
+ UDPBidirectionalBindSocketPairCreator(AF_INET6, type | SOCK_DGRAM, 0,
+ /* dual_stack = */ false)};
}
SocketPairKind IPv4UDPBidirectionalBindSocketPair(int type) {
std::string description =
absl::StrCat(DescribeSocketType(type), "connected IPv4 UDP socket");
- return SocketPairKind{description, UDPBidirectionalBindSocketPairCreator(
- AF_INET, type | SOCK_DGRAM, 0,
- /* dual_stack = */ false)};
+ return SocketPairKind{
+ description, AF_INET, type | SOCK_DGRAM, IPPROTO_UDP,
+ UDPBidirectionalBindSocketPairCreator(AF_INET, type | SOCK_DGRAM, 0,
+ /* dual_stack = */ false)};
}
SocketPairKind DualStackUDPBidirectionalBindSocketPair(int type) {
std::string description =
absl::StrCat(DescribeSocketType(type), "connected dual stack UDP socket");
- return SocketPairKind{description, UDPBidirectionalBindSocketPairCreator(
- AF_INET6, type | SOCK_DGRAM, 0,
- /* dual_stack = */ true)};
+ return SocketPairKind{
+ description, AF_INET6, type | SOCK_DGRAM, IPPROTO_UDP,
+ UDPBidirectionalBindSocketPairCreator(AF_INET6, type | SOCK_DGRAM, 0,
+ /* dual_stack = */ true)};
}
SocketPairKind IPv4UDPUnboundSocketPair(int type) {
std::string description =
absl::StrCat(DescribeSocketType(type), "IPv4 UDP socket");
return SocketPairKind{
- description, UDPUnboundSocketPairCreator(AF_INET, type | SOCK_DGRAM, 0,
- /* dual_stack = */ false)};
+ description, AF_INET, type | SOCK_DGRAM, IPPROTO_UDP,
+ UDPUnboundSocketPairCreator(AF_INET, type | SOCK_DGRAM, 0,
+ /* dual_stack = */ false)};
}
SocketKind IPv4UDPUnboundSocket(int type) {
std::string description =
absl::StrCat(DescribeSocketType(type), "IPv4 UDP socket");
- return SocketKind{description, UnboundSocketCreator(
- AF_INET, type | SOCK_DGRAM, IPPROTO_UDP)};
+ return SocketKind{
+ description, AF_INET, type | SOCK_DGRAM, IPPROTO_UDP,
+ UnboundSocketCreator(AF_INET, type | SOCK_DGRAM, IPPROTO_UDP)};
}
SocketKind IPv4TCPUnboundSocket(int type) {
std::string description =
absl::StrCat(DescribeSocketType(type), "IPv4 TCP socket");
- return SocketKind{description, UnboundSocketCreator(
- AF_INET, type | SOCK_STREAM, IPPROTO_TCP)};
+ return SocketKind{
+ description, AF_INET, type | SOCK_STREAM, IPPROTO_TCP,
+ UnboundSocketCreator(AF_INET, type | SOCK_STREAM, IPPROTO_TCP)};
}
} // namespace testing
diff --git a/test/syscalls/linux/itimer.cc b/test/syscalls/linux/itimer.cc
index 57ffd1595..51ce323b9 100644
--- a/test/syscalls/linux/itimer.cc
+++ b/test/syscalls/linux/itimer.cc
@@ -197,12 +197,17 @@ int TestSIGALRMToMainThread() {
// (but don't guarantee it), so we expect to see most samples on the main
// thread.
//
+ // The number of SIGALRMs delivered to a worker should not exceed 20%
+ // of the number of total signals expected (this is somewhat arbitrary).
+ const int worker_threshold = result.expected_total / 5;
+
+ //
// Linux only guarantees timers will never expire before the requested time.
// Thus, we only check the upper bound and also it at least have one sample.
TEST_CHECK(result.main_thread_samples <= result.expected_total);
TEST_CHECK(result.main_thread_samples > 0);
for (int num : result.worker_samples) {
- TEST_CHECK_MSG(num <= 50, "worker received too many samples");
+ TEST_CHECK_MSG(num <= worker_threshold, "worker received too many samples");
}
return 0;
diff --git a/test/syscalls/linux/madvise.cc b/test/syscalls/linux/madvise.cc
index f6ad4d18b..08ff4052c 100644
--- a/test/syscalls/linux/madvise.cc
+++ b/test/syscalls/linux/madvise.cc
@@ -29,6 +29,7 @@
#include "test/util/file_descriptor.h"
#include "test/util/logging.h"
#include "test/util/memory_util.h"
+#include "test/util/multiprocess_util.h"
#include "test/util/posix_error.h"
#include "test/util/temp_path.h"
#include "test/util/test_util.h"
@@ -136,6 +137,115 @@ TEST(MadviseDontneedTest, IgnoresPermissions) {
EXPECT_THAT(madvise(m.ptr(), m.len(), MADV_DONTNEED), SyscallSucceeds());
}
+TEST(MadviseDontforkTest, AddressLength) {
+ auto m =
+ ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, MAP_PRIVATE));
+ char *addr = static_cast<char *>(m.ptr());
+
+ // Address must be page aligned.
+ EXPECT_THAT(madvise(addr + 1, kPageSize, MADV_DONTFORK),
+ SyscallFailsWithErrno(EINVAL));
+
+ // Zero length madvise always succeeds.
+ EXPECT_THAT(madvise(addr, 0, MADV_DONTFORK), SyscallSucceeds());
+
+ // Length must not roll over after rounding up.
+ size_t badlen = std::numeric_limits<std::size_t>::max() - (kPageSize / 2);
+ EXPECT_THAT(madvise(0, badlen, MADV_DONTFORK), SyscallFailsWithErrno(EINVAL));
+
+ // Length need not be page aligned - it is implicitly rounded up.
+ EXPECT_THAT(madvise(addr, 1, MADV_DONTFORK), SyscallSucceeds());
+ EXPECT_THAT(madvise(addr, kPageSize, MADV_DONTFORK), SyscallSucceeds());
+}
+
+TEST(MadviseDontforkTest, DontforkShared) {
+ // Mmap two shared file-backed pages and MADV_DONTFORK the second page.
+ TempPath f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ /* parent = */ GetAbsoluteTestTmpdir(),
+ /* content = */ std::string(kPageSize * 2, 2),
+ TempPath::kDefaultFileMode));
+ FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDWR));
+
+ Mapping m = ASSERT_NO_ERRNO_AND_VALUE(Mmap(
+ nullptr, kPageSize * 2, PROT_READ | PROT_WRITE, MAP_SHARED, fd.get(), 0));
+
+ const Mapping ms1 = Mapping(reinterpret_cast<void *>(m.addr()), kPageSize);
+ const Mapping ms2 =
+ Mapping(reinterpret_cast<void *>(m.addr() + kPageSize), kPageSize);
+ m.release();
+
+ ASSERT_THAT(madvise(ms2.ptr(), kPageSize, MADV_DONTFORK), SyscallSucceeds());
+
+ const auto rest = [&] {
+ // First page is mapped in child and modifications are visible to parent
+ // via the shared mapping.
+ TEST_CHECK(IsMapped(ms1.addr()));
+ ExpectAllMappingBytes(ms1, 2);
+ memset(ms1.ptr(), 1, kPageSize);
+ ExpectAllMappingBytes(ms1, 1);
+
+ // Second page must not be mapped in child.
+ TEST_CHECK(!IsMapped(ms2.addr()));
+ };
+
+ EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0));
+
+ ExpectAllMappingBytes(ms1, 1); // page contents modified by child.
+ ExpectAllMappingBytes(ms2, 2); // page contents unchanged.
+}
+
+TEST(MadviseDontforkTest, DontforkAnonPrivate) {
+ // Mmap three anonymous pages and MADV_DONTFORK the middle page.
+ Mapping m = ASSERT_NO_ERRNO_AND_VALUE(
+ MmapAnon(kPageSize * 3, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+ const Mapping mp1 = Mapping(reinterpret_cast<void *>(m.addr()), kPageSize);
+ const Mapping mp2 =
+ Mapping(reinterpret_cast<void *>(m.addr() + kPageSize), kPageSize);
+ const Mapping mp3 =
+ Mapping(reinterpret_cast<void *>(m.addr() + 2 * kPageSize), kPageSize);
+ m.release();
+
+ ASSERT_THAT(madvise(mp2.ptr(), kPageSize, MADV_DONTFORK), SyscallSucceeds());
+
+ // Verify that all pages are zeroed and memset the first, second and third
+ // pages to 1, 2, and 3 respectively.
+ ExpectAllMappingBytes(mp1, 0);
+ memset(mp1.ptr(), 1, kPageSize);
+
+ ExpectAllMappingBytes(mp2, 0);
+ memset(mp2.ptr(), 2, kPageSize);
+
+ ExpectAllMappingBytes(mp3, 0);
+ memset(mp3.ptr(), 3, kPageSize);
+
+ const auto rest = [&] {
+ // Verify first page is mapped, verify its contents and then modify the
+ // page. The mapping is private so the modifications are not visible to
+ // the parent.
+ TEST_CHECK(IsMapped(mp1.addr()));
+ ExpectAllMappingBytes(mp1, 1);
+ memset(mp1.ptr(), 11, kPageSize);
+ ExpectAllMappingBytes(mp1, 11);
+
+ // Verify second page is not mapped.
+ TEST_CHECK(!IsMapped(mp2.addr()));
+
+ // Verify third page is mapped, verify its contents and then modify the
+ // page. The mapping is private so the modifications are not visible to
+ // the parent.
+ TEST_CHECK(IsMapped(mp3.addr()));
+ ExpectAllMappingBytes(mp3, 3);
+ memset(mp3.ptr(), 13, kPageSize);
+ ExpectAllMappingBytes(mp3, 13);
+ };
+ EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0));
+
+ // The fork and COW by child should not affect the parent mappings.
+ ExpectAllMappingBytes(mp1, 1);
+ ExpectAllMappingBytes(mp2, 2);
+ ExpectAllMappingBytes(mp3, 3);
+}
+
} // namespace
} // namespace testing
diff --git a/test/syscalls/linux/memfd.cc b/test/syscalls/linux/memfd.cc
index 3494bcd13..e57b49a4a 100644
--- a/test/syscalls/linux/memfd.cc
+++ b/test/syscalls/linux/memfd.cc
@@ -60,7 +60,8 @@ int memfd_create(const std::string& name, unsigned int flags) {
return syscall(__NR_memfd_create, name.c_str(), flags);
}
-PosixErrorOr<FileDescriptor> MemfdCreate(const std::string& name, uint32_t flags) {
+PosixErrorOr<FileDescriptor> MemfdCreate(const std::string& name,
+ uint32_t flags) {
int fd = memfd_create(name, flags);
if (fd < 0) {
return PosixError(
diff --git a/test/syscalls/linux/mknod.cc b/test/syscalls/linux/mknod.cc
index febf2eb14..4c45766c7 100644
--- a/test/syscalls/linux/mknod.cc
+++ b/test/syscalls/linux/mknod.cc
@@ -90,7 +90,7 @@ TEST(MknodTest, Fifo) {
ASSERT_THAT(stat(fifo.c_str(), &st), SyscallSucceeds());
EXPECT_TRUE(S_ISFIFO(st.st_mode));
- std::string msg = "some string";
+ std::string msg = "some std::string";
std::vector<char> buf(512);
// Read-end of the pipe.
@@ -116,7 +116,7 @@ TEST(MknodTest, FifoOtrunc) {
ASSERT_THAT(stat(fifo.c_str(), &st), SyscallSucceeds());
EXPECT_TRUE(S_ISFIFO(st.st_mode));
- std::string msg = "some string";
+ std::string msg = "some std::string";
std::vector<char> buf(512);
// Read-end of the pipe.
ScopedThread t([&fifo, &buf, &msg]() {
@@ -144,7 +144,7 @@ TEST(MknodTest, FifoTruncNoOp) {
ASSERT_THAT(stat(fifo.c_str(), &st), SyscallSucceeds());
EXPECT_TRUE(S_ISFIFO(st.st_mode));
- std::string msg = "some string";
+ std::string msg = "some std::string";
std::vector<char> buf(512);
// Read-end of the pipe.
ScopedThread t([&fifo, &buf, &msg]() {
diff --git a/test/syscalls/linux/mmap.cc b/test/syscalls/linux/mmap.cc
index 5b5b4c2e8..a112316e9 100644
--- a/test/syscalls/linux/mmap.cc
+++ b/test/syscalls/linux/mmap.cc
@@ -113,7 +113,7 @@ class MMapTest : public ::testing::Test {
size_t length_ = 0;
};
-// Matches if arg contains the same contents as std::string str.
+// Matches if arg contains the same contents as string str.
MATCHER_P(EqualsMemory, str, "") {
if (0 == memcmp(arg, str.c_str(), str.size())) {
return true;
@@ -1086,8 +1086,8 @@ TEST_F(MMapFileTest, WriteShared) {
ASSERT_THAT(Read(buf.data(), buf.size()),
SyscallSucceedsWithValue(buf.size()));
// Cast to void* to avoid EXPECT_THAT assuming buf.data() is a
- // NUL-terminated C std::string. EXPECT_THAT will try to print a char* as a C
- // std::string, possibly overruning the buffer.
+ // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C
+ // string, possibly overruning the buffer.
EXPECT_THAT(reinterpret_cast<void*>(buf.data()),
EqualsMemory(std::string(kFileContents)));
}
@@ -1122,6 +1122,7 @@ TEST_F(MMapFileTest, WriteSharedBeyondEnd) {
ASSERT_THAT(Read(buf.data(), buf.size()),
SyscallSucceedsWithValue(first.size()));
// Cast to void* to avoid EXPECT_THAT assuming buf.data() is a
+ // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C
// NUL-terminated C std::string. EXPECT_THAT will try to print a char* as a C
// std::string, possibly overruning the buffer.
EXPECT_THAT(reinterpret_cast<void*>(buf.data()), EqualsMemory(first));
@@ -1159,8 +1160,8 @@ TEST_F(MMapFileTest, WriteSharedTruncateUp) {
ASSERT_THAT(Read(buf.data(), buf.size()),
SyscallSucceedsWithValue(buf.size()));
// Cast to void* to avoid EXPECT_THAT assuming buf.data() is a
- // NUL-terminated C std::string. EXPECT_THAT will try to print a char* as a C
- // std::string, possibly overruning the buffer.
+ // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C
+ // string, possibly overruning the buffer.
EXPECT_THAT(reinterpret_cast<void*>(buf.data()), EqualsMemory(first));
EXPECT_THAT(reinterpret_cast<void*>(buf.data() + kPageSize / 2),
EqualsMemory(second));
@@ -1234,8 +1235,8 @@ TEST_F(MMapFileTest, WriteSharedTruncateDownThenUp) {
ASSERT_THAT(Read(buf.data(), buf.size()),
SyscallSucceedsWithValue(buf.size()));
// Cast to void* to avoid EXPECT_THAT assuming buf.data() is a
- // NUL-terminated C std::string. EXPECT_THAT will try to print a char* as a C
- // std::string, possibly overruning the buffer.
+ // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C
+ // string, possibly overruning the buffer.
EXPECT_THAT(reinterpret_cast<void*>(buf.data()), EqualsMemory(zeroed));
}
@@ -1363,8 +1364,8 @@ TEST_F(MMapFileTest, WritePrivate) {
ASSERT_THAT(Read(buf.data(), buf.size()),
SyscallSucceedsWithValue(buf.size()));
// Cast to void* to avoid EXPECT_THAT assuming buf.data() is a
- // NUL-terminated C std::string. EXPECT_THAT will try to print a char* as a C
- // std::string, possibly overruning the buffer.
+ // NUL-terminated C string. EXPECT_THAT will try to print a char* as a C
+ // string, possibly overruning the buffer.
EXPECT_THAT(reinterpret_cast<void*>(buf.data()),
EqualsMemory(std::string(len, '\0')));
}
diff --git a/test/syscalls/linux/mount.cc b/test/syscalls/linux/mount.cc
index 3a17672aa..e35be3cab 100644
--- a/test/syscalls/linux/mount.cc
+++ b/test/syscalls/linux/mount.cc
@@ -190,6 +190,14 @@ TEST(MountTest, MountTmpfs) {
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+ // NOTE(b/129868551): Inode IDs are only stable across S/R if we have an open
+ // FD for that inode. Since we are going to compare inode IDs below, get a
+ // FileDescriptor for this directory here, which will be closed automatically
+ // at the end of the test.
+ auto const fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_DIRECTORY, O_RDONLY));
+
const struct stat before = ASSERT_NO_ERRNO_AND_VALUE(Stat(dir.path()));
{
diff --git a/test/syscalls/linux/mremap.cc b/test/syscalls/linux/mremap.cc
index 7298d4ca8..64e435cb7 100644
--- a/test/syscalls/linux/mremap.cc
+++ b/test/syscalls/linux/mremap.cc
@@ -46,17 +46,6 @@ PosixErrorOr<void*> Mremap(void* old_address, size_t old_size, size_t new_size,
return rv;
}
-// Returns true if the page containing addr is mapped.
-bool IsMapped(uintptr_t addr) {
- int const rv = msync(reinterpret_cast<void*>(addr & ~(kPageSize - 1)),
- kPageSize, MS_ASYNC);
- if (rv == 0) {
- return true;
- }
- TEST_PCHECK_MSG(errno == ENOMEM, "msync failed with unexpected errno");
- return false;
-}
-
// Fixture for mremap tests parameterized by mmap flags.
using MremapParamTest = ::testing::TestWithParam<int>;
diff --git a/test/syscalls/linux/open.cc b/test/syscalls/linux/open.cc
index 42646bb02..e0525f386 100644
--- a/test/syscalls/linux/open.cc
+++ b/test/syscalls/linux/open.cc
@@ -28,6 +28,7 @@
#include "test/util/fs_util.h"
#include "test/util/temp_path.h"
#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
namespace gvisor {
namespace testing {
@@ -214,6 +215,42 @@ TEST_F(OpenTest, AppendOnly) {
SyscallSucceedsWithValue(kBufSize * 3));
}
+TEST_F(OpenTest, AppendConcurrentWrite) {
+ constexpr int kThreadCount = 5;
+ constexpr int kBytesPerThread = 10000;
+ std::unique_ptr<ScopedThread> threads[kThreadCount];
+
+ // In case of the uncached policy, we expect that a file system can be changed
+ // externally, so we create a new inode each time when we open a file and we
+ // can't guarantee that writes to files with O_APPEND will work correctly.
+ SKIP_IF(getenv("GVISOR_GOFER_UNCACHED"));
+
+ EXPECT_THAT(truncate(test_file_name_.c_str(), 0), SyscallSucceeds());
+
+ std::string filename = test_file_name_;
+ DisableSave ds; // Too many syscalls.
+ // Start kThreadCount threads which will write concurrently into the same
+ // file.
+ for (int i = 0; i < kThreadCount; i++) {
+ threads[i] = absl::make_unique<ScopedThread>([filename]() {
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(filename, O_RDWR | O_APPEND));
+
+ for (int j = 0; j < kBytesPerThread; j++) {
+ EXPECT_THAT(WriteFd(fd.get(), &j, 1), SyscallSucceedsWithValue(1));
+ }
+ });
+ }
+ for (int i = 0; i < kThreadCount; i++) {
+ threads[i]->Join();
+ }
+
+ // Check that the size of the file is correct.
+ struct stat st;
+ EXPECT_THAT(stat(test_file_name_.c_str(), &st), SyscallSucceeds());
+ EXPECT_EQ(st.st_size, kThreadCount * kBytesPerThread);
+}
+
TEST_F(OpenTest, Truncate) {
{
// First write some data to the new file and close it.
diff --git a/test/syscalls/linux/pipe.cc b/test/syscalls/linux/pipe.cc
index 67b93ecf5..65afb90f3 100644
--- a/test/syscalls/linux/pipe.cc
+++ b/test/syscalls/linux/pipe.cc
@@ -50,32 +50,28 @@ struct PipeCreator {
};
class PipeTest : public ::testing::TestWithParam<PipeCreator> {
- protected:
- FileDescriptor rfd;
- FileDescriptor wfd;
-
public:
static void SetUpTestSuite() {
// Tests intentionally generate SIGPIPE.
TEST_PCHECK(signal(SIGPIPE, SIG_IGN) != SIG_ERR);
}
- // Initializes rfd and wfd as a blocking pipe.
+ // Initializes rfd_ and wfd_ as a blocking pipe.
//
// The return value indicates success: the test should be skipped otherwise.
bool CreateBlocking() { return create(true); }
- // Initializes rfd and wfd as a non-blocking pipe.
+ // Initializes rfd_ and wfd_ as a non-blocking pipe.
//
// The return value is per CreateBlocking.
bool CreateNonBlocking() { return create(false); }
// Returns true iff the pipe represents a named pipe.
- bool IsNamedPipe() { return namedpipe_; }
+ bool IsNamedPipe() const { return named_pipe_; }
- int Size() {
- int s1 = fcntl(rfd.get(), F_GETPIPE_SZ);
- int s2 = fcntl(wfd.get(), F_GETPIPE_SZ);
+ int Size() const {
+ int s1 = fcntl(rfd_.get(), F_GETPIPE_SZ);
+ int s2 = fcntl(wfd_.get(), F_GETPIPE_SZ);
EXPECT_GT(s1, 0);
EXPECT_GT(s2, 0);
EXPECT_EQ(s1, s2);
@@ -87,20 +83,18 @@ class PipeTest : public ::testing::TestWithParam<PipeCreator> {
}
private:
- bool namedpipe_ = false;
-
bool create(bool wants_blocking) {
// Generate the pipe.
int fds[2] = {-1, -1};
bool is_blocking = false;
- GetParam().create_(fds, &is_blocking, &namedpipe_);
+ GetParam().create_(fds, &is_blocking, &named_pipe_);
if (fds[0] < 0 || fds[1] < 0) {
return false;
}
// Save descriptors.
- rfd.reset(fds[0]);
- wfd.reset(fds[1]);
+ rfd_.reset(fds[0]);
+ wfd_.reset(fds[1]);
// Adjust blocking, if needed.
if (!is_blocking && wants_blocking) {
@@ -115,6 +109,13 @@ class PipeTest : public ::testing::TestWithParam<PipeCreator> {
return true;
}
+
+ protected:
+ FileDescriptor rfd_;
+ FileDescriptor wfd_;
+
+ private:
+ bool named_pipe_ = false;
};
TEST_P(PipeTest, Inode) {
@@ -122,9 +123,9 @@ TEST_P(PipeTest, Inode) {
// Ensure that the inode number is the same for each end.
struct stat rst;
- ASSERT_THAT(fstat(rfd.get(), &rst), SyscallSucceeds());
+ ASSERT_THAT(fstat(rfd_.get(), &rst), SyscallSucceeds());
struct stat wst;
- ASSERT_THAT(fstat(wfd.get(), &wst), SyscallSucceeds());
+ ASSERT_THAT(fstat(wfd_.get(), &wst), SyscallSucceeds());
EXPECT_EQ(rst.st_ino, wst.st_ino);
}
@@ -133,9 +134,10 @@ TEST_P(PipeTest, Permissions) {
// Attempt bad operations.
int buf = kTestValue;
- ASSERT_THAT(write(rfd.get(), &buf, sizeof(buf)),
+ ASSERT_THAT(write(rfd_.get(), &buf, sizeof(buf)),
+ SyscallFailsWithErrno(EBADF));
+ EXPECT_THAT(read(wfd_.get(), &buf, sizeof(buf)),
SyscallFailsWithErrno(EBADF));
- EXPECT_THAT(read(wfd.get(), &buf, sizeof(buf)), SyscallFailsWithErrno(EBADF));
}
TEST_P(PipeTest, Flags) {
@@ -144,13 +146,13 @@ TEST_P(PipeTest, Flags) {
if (IsNamedPipe()) {
// May be stubbed to zero; define locally.
constexpr int kLargefile = 0100000;
- EXPECT_THAT(fcntl(rfd.get(), F_GETFL),
+ EXPECT_THAT(fcntl(rfd_.get(), F_GETFL),
SyscallSucceedsWithValue(kLargefile | O_RDONLY));
- EXPECT_THAT(fcntl(wfd.get(), F_GETFL),
+ EXPECT_THAT(fcntl(wfd_.get(), F_GETFL),
SyscallSucceedsWithValue(kLargefile | O_WRONLY));
} else {
- EXPECT_THAT(fcntl(rfd.get(), F_GETFL), SyscallSucceedsWithValue(O_RDONLY));
- EXPECT_THAT(fcntl(wfd.get(), F_GETFL), SyscallSucceedsWithValue(O_WRONLY));
+ EXPECT_THAT(fcntl(rfd_.get(), F_GETFL), SyscallSucceedsWithValue(O_RDONLY));
+ EXPECT_THAT(fcntl(wfd_.get(), F_GETFL), SyscallSucceedsWithValue(O_WRONLY));
}
}
@@ -159,9 +161,9 @@ TEST_P(PipeTest, Write) {
int wbuf = kTestValue;
int rbuf = ~kTestValue;
- ASSERT_THAT(write(wfd.get(), &wbuf, sizeof(wbuf)),
+ ASSERT_THAT(write(wfd_.get(), &wbuf, sizeof(wbuf)),
SyscallSucceedsWithValue(sizeof(wbuf)));
- ASSERT_THAT(read(rfd.get(), &rbuf, sizeof(rbuf)),
+ ASSERT_THAT(read(rfd_.get(), &rbuf, sizeof(rbuf)),
SyscallSucceedsWithValue(sizeof(rbuf)));
EXPECT_EQ(wbuf, rbuf);
}
@@ -171,15 +173,15 @@ TEST_P(PipeTest, NonBlocking) {
int wbuf = kTestValue;
int rbuf = ~kTestValue;
- EXPECT_THAT(read(rfd.get(), &rbuf, sizeof(rbuf)),
+ EXPECT_THAT(read(rfd_.get(), &rbuf, sizeof(rbuf)),
SyscallFailsWithErrno(EWOULDBLOCK));
- ASSERT_THAT(write(wfd.get(), &wbuf, sizeof(wbuf)),
+ ASSERT_THAT(write(wfd_.get(), &wbuf, sizeof(wbuf)),
SyscallSucceedsWithValue(sizeof(wbuf)));
- ASSERT_THAT(read(rfd.get(), &rbuf, sizeof(rbuf)),
+ ASSERT_THAT(read(rfd_.get(), &rbuf, sizeof(rbuf)),
SyscallSucceedsWithValue(sizeof(rbuf)));
EXPECT_EQ(wbuf, rbuf);
- EXPECT_THAT(read(rfd.get(), &rbuf, sizeof(rbuf)),
+ EXPECT_THAT(read(rfd_.get(), &rbuf, sizeof(rbuf)),
SyscallFailsWithErrno(EWOULDBLOCK));
}
@@ -202,26 +204,26 @@ TEST_P(PipeTest, Seek) {
for (int i = 0; i < 4; i++) {
// Attempt absolute seeks.
- EXPECT_THAT(lseek(rfd.get(), 0, SEEK_SET), SyscallFailsWithErrno(ESPIPE));
- EXPECT_THAT(lseek(rfd.get(), 4, SEEK_SET), SyscallFailsWithErrno(ESPIPE));
- EXPECT_THAT(lseek(wfd.get(), 0, SEEK_SET), SyscallFailsWithErrno(ESPIPE));
- EXPECT_THAT(lseek(wfd.get(), 4, SEEK_SET), SyscallFailsWithErrno(ESPIPE));
+ EXPECT_THAT(lseek(rfd_.get(), 0, SEEK_SET), SyscallFailsWithErrno(ESPIPE));
+ EXPECT_THAT(lseek(rfd_.get(), 4, SEEK_SET), SyscallFailsWithErrno(ESPIPE));
+ EXPECT_THAT(lseek(wfd_.get(), 0, SEEK_SET), SyscallFailsWithErrno(ESPIPE));
+ EXPECT_THAT(lseek(wfd_.get(), 4, SEEK_SET), SyscallFailsWithErrno(ESPIPE));
// Attempt relative seeks.
- EXPECT_THAT(lseek(rfd.get(), 0, SEEK_CUR), SyscallFailsWithErrno(ESPIPE));
- EXPECT_THAT(lseek(rfd.get(), 4, SEEK_CUR), SyscallFailsWithErrno(ESPIPE));
- EXPECT_THAT(lseek(wfd.get(), 0, SEEK_CUR), SyscallFailsWithErrno(ESPIPE));
- EXPECT_THAT(lseek(wfd.get(), 4, SEEK_CUR), SyscallFailsWithErrno(ESPIPE));
+ EXPECT_THAT(lseek(rfd_.get(), 0, SEEK_CUR), SyscallFailsWithErrno(ESPIPE));
+ EXPECT_THAT(lseek(rfd_.get(), 4, SEEK_CUR), SyscallFailsWithErrno(ESPIPE));
+ EXPECT_THAT(lseek(wfd_.get(), 0, SEEK_CUR), SyscallFailsWithErrno(ESPIPE));
+ EXPECT_THAT(lseek(wfd_.get(), 4, SEEK_CUR), SyscallFailsWithErrno(ESPIPE));
// Attempt end-of-file seeks.
- EXPECT_THAT(lseek(rfd.get(), 0, SEEK_CUR), SyscallFailsWithErrno(ESPIPE));
- EXPECT_THAT(lseek(rfd.get(), -4, SEEK_END), SyscallFailsWithErrno(ESPIPE));
- EXPECT_THAT(lseek(wfd.get(), 0, SEEK_CUR), SyscallFailsWithErrno(ESPIPE));
- EXPECT_THAT(lseek(wfd.get(), -4, SEEK_END), SyscallFailsWithErrno(ESPIPE));
+ EXPECT_THAT(lseek(rfd_.get(), 0, SEEK_CUR), SyscallFailsWithErrno(ESPIPE));
+ EXPECT_THAT(lseek(rfd_.get(), -4, SEEK_END), SyscallFailsWithErrno(ESPIPE));
+ EXPECT_THAT(lseek(wfd_.get(), 0, SEEK_CUR), SyscallFailsWithErrno(ESPIPE));
+ EXPECT_THAT(lseek(wfd_.get(), -4, SEEK_END), SyscallFailsWithErrno(ESPIPE));
// Add some more data to the pipe.
int buf = kTestValue;
- ASSERT_THAT(write(wfd.get(), &buf, sizeof(buf)),
+ ASSERT_THAT(write(wfd_.get(), &buf, sizeof(buf)),
SyscallSucceedsWithValue(sizeof(buf)));
}
}
@@ -230,14 +232,14 @@ TEST_P(PipeTest, OffsetCalls) {
SKIP_IF(!CreateBlocking());
int buf;
- EXPECT_THAT(pread(wfd.get(), &buf, sizeof(buf), 0),
+ EXPECT_THAT(pread(wfd_.get(), &buf, sizeof(buf), 0),
SyscallFailsWithErrno(ESPIPE));
- EXPECT_THAT(pwrite(rfd.get(), &buf, sizeof(buf), 0),
+ EXPECT_THAT(pwrite(rfd_.get(), &buf, sizeof(buf), 0),
SyscallFailsWithErrno(ESPIPE));
struct iovec iov;
- EXPECT_THAT(preadv(wfd.get(), &iov, 1, 0), SyscallFailsWithErrno(ESPIPE));
- EXPECT_THAT(pwritev(rfd.get(), &iov, 1, 0), SyscallFailsWithErrno(ESPIPE));
+ EXPECT_THAT(preadv(wfd_.get(), &iov, 1, 0), SyscallFailsWithErrno(ESPIPE));
+ EXPECT_THAT(pwritev(rfd_.get(), &iov, 1, 0), SyscallFailsWithErrno(ESPIPE));
}
TEST_P(PipeTest, WriterSideCloses) {
@@ -245,13 +247,13 @@ TEST_P(PipeTest, WriterSideCloses) {
ScopedThread t([this]() {
int buf = ~kTestValue;
- ASSERT_THAT(read(rfd.get(), &buf, sizeof(buf)),
+ ASSERT_THAT(read(rfd_.get(), &buf, sizeof(buf)),
SyscallSucceedsWithValue(sizeof(buf)));
EXPECT_EQ(buf, kTestValue);
// This will return when the close() completes.
- ASSERT_THAT(read(rfd.get(), &buf, sizeof(buf)), SyscallSucceeds());
+ ASSERT_THAT(read(rfd_.get(), &buf, sizeof(buf)), SyscallSucceeds());
// This will return straight away.
- ASSERT_THAT(read(rfd.get(), &buf, sizeof(buf)),
+ ASSERT_THAT(read(rfd_.get(), &buf, sizeof(buf)),
SyscallSucceedsWithValue(0));
});
@@ -260,14 +262,14 @@ TEST_P(PipeTest, WriterSideCloses) {
// Write to unblock.
int buf = kTestValue;
- ASSERT_THAT(write(wfd.get(), &buf, sizeof(buf)),
+ ASSERT_THAT(write(wfd_.get(), &buf, sizeof(buf)),
SyscallSucceedsWithValue(sizeof(buf)));
// Sleep a bit so the thread can block again.
absl::SleepFor(syncDelay);
// Allow the thread to complete.
- ASSERT_THAT(close(wfd.release()), SyscallSucceeds());
+ ASSERT_THAT(close(wfd_.release()), SyscallSucceeds());
t.Join();
}
@@ -275,36 +277,36 @@ TEST_P(PipeTest, WriterSideClosesReadDataFirst) {
SKIP_IF(!CreateBlocking());
int wbuf = kTestValue;
- ASSERT_THAT(write(wfd.get(), &wbuf, sizeof(wbuf)),
+ ASSERT_THAT(write(wfd_.get(), &wbuf, sizeof(wbuf)),
SyscallSucceedsWithValue(sizeof(wbuf)));
- ASSERT_THAT(close(wfd.release()), SyscallSucceeds());
+ ASSERT_THAT(close(wfd_.release()), SyscallSucceeds());
int rbuf;
- ASSERT_THAT(read(rfd.get(), &rbuf, sizeof(rbuf)),
+ ASSERT_THAT(read(rfd_.get(), &rbuf, sizeof(rbuf)),
SyscallSucceedsWithValue(sizeof(rbuf)));
EXPECT_EQ(wbuf, rbuf);
- EXPECT_THAT(read(rfd.get(), &rbuf, sizeof(rbuf)),
+ EXPECT_THAT(read(rfd_.get(), &rbuf, sizeof(rbuf)),
SyscallSucceedsWithValue(0));
}
TEST_P(PipeTest, ReaderSideCloses) {
SKIP_IF(!CreateBlocking());
- ASSERT_THAT(close(rfd.release()), SyscallSucceeds());
+ ASSERT_THAT(close(rfd_.release()), SyscallSucceeds());
int buf = kTestValue;
- EXPECT_THAT(write(wfd.get(), &buf, sizeof(buf)),
+ EXPECT_THAT(write(wfd_.get(), &buf, sizeof(buf)),
SyscallFailsWithErrno(EPIPE));
}
TEST_P(PipeTest, CloseTwice) {
SKIP_IF(!CreateBlocking());
- int _rfd = rfd.release();
- int _wfd = wfd.release();
- ASSERT_THAT(close(_rfd), SyscallSucceeds());
- ASSERT_THAT(close(_wfd), SyscallSucceeds());
- EXPECT_THAT(close(_rfd), SyscallFailsWithErrno(EBADF));
- EXPECT_THAT(close(_wfd), SyscallFailsWithErrno(EBADF));
+ int reader = rfd_.release();
+ int writer = wfd_.release();
+ ASSERT_THAT(close(reader), SyscallSucceeds());
+ ASSERT_THAT(close(writer), SyscallSucceeds());
+ EXPECT_THAT(close(reader), SyscallFailsWithErrno(EBADF));
+ EXPECT_THAT(close(writer), SyscallFailsWithErrno(EBADF));
}
// Blocking write returns EPIPE when read end is closed if nothing has been
@@ -316,18 +318,18 @@ TEST_P(PipeTest, BlockWriteClosed) {
ScopedThread t([this, &notify]() {
std::vector<char> buf(Size());
// Exactly fill the pipe buffer.
- ASSERT_THAT(WriteFd(wfd.get(), buf.data(), buf.size()),
+ ASSERT_THAT(WriteFd(wfd_.get(), buf.data(), buf.size()),
SyscallSucceedsWithValue(buf.size()));
notify.Notify();
// Attempt to write one more byte. Blocks.
// N.B. Don't use WriteFd, we don't want a retry.
- EXPECT_THAT(write(wfd.get(), buf.data(), 1), SyscallFailsWithErrno(EPIPE));
+ EXPECT_THAT(write(wfd_.get(), buf.data(), 1), SyscallFailsWithErrno(EPIPE));
});
notify.WaitForNotification();
- ASSERT_THAT(close(rfd.release()), SyscallSucceeds());
+ ASSERT_THAT(close(rfd_.release()), SyscallSucceeds());
t.Join();
}
@@ -337,12 +339,14 @@ TEST_P(PipeTest, BlockPartialWriteClosed) {
SKIP_IF(!CreateBlocking());
ScopedThread t([this]() {
- std::vector<char> buf(2 * Size());
+ const int pipe_size = Size();
+ std::vector<char> buf(2 * pipe_size);
+
// Write more than fits in the buffer. Blocks then returns partial write
// when the other end is closed. The next call returns EPIPE.
- ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()),
- SyscallSucceedsWithValue(Size()));
- EXPECT_THAT(write(wfd.get(), buf.data(), buf.size()),
+ ASSERT_THAT(write(wfd_.get(), buf.data(), buf.size()),
+ SyscallSucceedsWithValue(pipe_size));
+ EXPECT_THAT(write(wfd_.get(), buf.data(), buf.size()),
SyscallFailsWithErrno(EPIPE));
});
@@ -350,7 +354,7 @@ TEST_P(PipeTest, BlockPartialWriteClosed) {
absl::SleepFor(syncDelay);
// Unblock the above.
- ASSERT_THAT(close(rfd.release()), SyscallSucceeds());
+ ASSERT_THAT(close(rfd_.release()), SyscallSucceeds());
t.Join();
}
@@ -361,7 +365,7 @@ TEST_P(PipeTest, ReadFromClosedFd_NoRandomSave) {
ScopedThread t([this, &notify]() {
notify.Notify();
int buf;
- ASSERT_THAT(read(rfd.get(), &buf, sizeof(buf)),
+ ASSERT_THAT(read(rfd_.get(), &buf, sizeof(buf)),
SyscallSucceedsWithValue(sizeof(buf)));
ASSERT_EQ(kTestValue, buf);
});
@@ -375,9 +379,9 @@ TEST_P(PipeTest, ReadFromClosedFd_NoRandomSave) {
// is ongoing read() above. We will not be able to restart the read()
// successfully in restore run since the read fd is closed.
const DisableSave ds;
- ASSERT_THAT(close(rfd.release()), SyscallSucceeds());
+ ASSERT_THAT(close(rfd_.release()), SyscallSucceeds());
int buf = kTestValue;
- ASSERT_THAT(write(wfd.get(), &buf, sizeof(buf)),
+ ASSERT_THAT(write(wfd_.get(), &buf, sizeof(buf)),
SyscallSucceedsWithValue(sizeof(buf)));
t.Join();
}
@@ -387,18 +391,18 @@ TEST_P(PipeTest, FionRead) {
SKIP_IF(!CreateBlocking());
int n;
- ASSERT_THAT(ioctl(rfd.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+ ASSERT_THAT(ioctl(rfd_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
EXPECT_EQ(n, 0);
- ASSERT_THAT(ioctl(wfd.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+ ASSERT_THAT(ioctl(wfd_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
EXPECT_EQ(n, 0);
std::vector<char> buf(Size());
- ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()),
+ ASSERT_THAT(write(wfd_.get(), buf.data(), buf.size()),
SyscallSucceedsWithValue(buf.size()));
- EXPECT_THAT(ioctl(rfd.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+ EXPECT_THAT(ioctl(rfd_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
EXPECT_EQ(n, buf.size());
- EXPECT_THAT(ioctl(wfd.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
+ EXPECT_THAT(ioctl(wfd_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0));
EXPECT_EQ(n, buf.size());
}
@@ -409,11 +413,11 @@ TEST_P(PipeTest, OpenViaProcSelfFD) {
SKIP_IF(IsNamedPipe());
// Close the write end of the pipe.
- ASSERT_THAT(close(wfd.release()), SyscallSucceeds());
+ ASSERT_THAT(close(wfd_.release()), SyscallSucceeds());
// Open other side via /proc/self/fd. It should not block.
FileDescriptor proc_self_fd = ASSERT_NO_ERRNO_AND_VALUE(
- Open(absl::StrCat("/proc/self/fd/", rfd.get()), O_RDONLY));
+ Open(absl::StrCat("/proc/self/fd/", rfd_.get()), O_RDONLY));
}
// Test that opening and reading from an anonymous pipe (with existing writes)
@@ -424,13 +428,13 @@ TEST_P(PipeTest, OpenViaProcSelfFDWithWrites) {
// Write to the pipe and then close the write fd.
int wbuf = kTestValue;
- ASSERT_THAT(write(wfd.get(), &wbuf, sizeof(wbuf)),
+ ASSERT_THAT(write(wfd_.get(), &wbuf, sizeof(wbuf)),
SyscallSucceedsWithValue(sizeof(wbuf)));
- ASSERT_THAT(close(wfd.release()), SyscallSucceeds());
+ ASSERT_THAT(close(wfd_.release()), SyscallSucceeds());
// Open read side via /proc/self/fd, and read from it.
FileDescriptor proc_self_fd = ASSERT_NO_ERRNO_AND_VALUE(
- Open(absl::StrCat("/proc/self/fd/", rfd.get()), O_RDONLY));
+ Open(absl::StrCat("/proc/self/fd/", rfd_.get()), O_RDONLY));
int rbuf;
ASSERT_THAT(read(proc_self_fd.get(), &rbuf, sizeof(rbuf)),
SyscallSucceedsWithValue(sizeof(rbuf)));
@@ -443,13 +447,13 @@ TEST_P(PipeTest, ProcFDReleasesFile) {
// Stat the pipe FD, which shouldn't alter the refcount.
struct stat wst;
- ASSERT_THAT(lstat(absl::StrCat("/proc/self/fd/", wfd.get()).c_str(), &wst),
+ ASSERT_THAT(lstat(absl::StrCat("/proc/self/fd/", wfd_.get()).c_str(), &wst),
SyscallSucceeds());
// Close the write end and ensure that read indicates EOF.
- wfd.reset();
+ wfd_.reset();
char buf;
- ASSERT_THAT(read(rfd.get(), &buf, 1), SyscallSucceedsWithValue(0));
+ ASSERT_THAT(read(rfd_.get(), &buf, 1), SyscallSucceedsWithValue(0));
}
// Same for /proc/<PID>/fdinfo.
@@ -459,30 +463,30 @@ TEST_P(PipeTest, ProcFDInfoReleasesFile) {
// Stat the pipe FD, which shouldn't alter the refcount.
struct stat wst;
ASSERT_THAT(
- lstat(absl::StrCat("/proc/self/fdinfo/", wfd.get()).c_str(), &wst),
+ lstat(absl::StrCat("/proc/self/fdinfo/", wfd_.get()).c_str(), &wst),
SyscallSucceeds());
// Close the write end and ensure that read indicates EOF.
- wfd.reset();
+ wfd_.reset();
char buf;
- ASSERT_THAT(read(rfd.get(), &buf, 1), SyscallSucceedsWithValue(0));
+ ASSERT_THAT(read(rfd_.get(), &buf, 1), SyscallSucceedsWithValue(0));
}
TEST_P(PipeTest, SizeChange) {
SKIP_IF(!CreateBlocking());
// Set the minimum possible size.
- ASSERT_THAT(fcntl(rfd.get(), F_SETPIPE_SZ, 0), SyscallSucceeds());
+ ASSERT_THAT(fcntl(rfd_.get(), F_SETPIPE_SZ, 0), SyscallSucceeds());
int min = Size();
EXPECT_GT(min, 0); // Should be rounded up.
// Set from the read end.
- ASSERT_THAT(fcntl(rfd.get(), F_SETPIPE_SZ, min + 1), SyscallSucceeds());
+ ASSERT_THAT(fcntl(rfd_.get(), F_SETPIPE_SZ, min + 1), SyscallSucceeds());
int med = Size();
EXPECT_GT(med, min); // Should have grown, may be rounded.
// Set from the write end.
- ASSERT_THAT(fcntl(wfd.get(), F_SETPIPE_SZ, med + 1), SyscallSucceeds());
+ ASSERT_THAT(fcntl(wfd_.get(), F_SETPIPE_SZ, med + 1), SyscallSucceeds());
int max = Size();
EXPECT_GT(max, med); // Ditto.
}
@@ -491,9 +495,9 @@ TEST_P(PipeTest, SizeChangeMax) {
SKIP_IF(!CreateBlocking());
// Assert there's some maximum.
- EXPECT_THAT(fcntl(rfd.get(), F_SETPIPE_SZ, 0x7fffffffffffffff),
+ EXPECT_THAT(fcntl(rfd_.get(), F_SETPIPE_SZ, 0x7fffffffffffffff),
SyscallFailsWithErrno(EINVAL));
- EXPECT_THAT(fcntl(wfd.get(), F_SETPIPE_SZ, 0x7fffffffffffffff),
+ EXPECT_THAT(fcntl(wfd_.get(), F_SETPIPE_SZ, 0x7fffffffffffffff),
SyscallFailsWithErrno(EINVAL));
}
@@ -505,14 +509,14 @@ TEST_P(PipeTest, SizeChangeFull) {
// adjust the size and the call below will return success. It was found via
// experimentation that this granularity avoids the rounding for Linux.
constexpr int kDelta = 64 * 1024;
- ASSERT_THAT(fcntl(wfd.get(), F_SETPIPE_SZ, Size() + kDelta),
+ ASSERT_THAT(fcntl(wfd_.get(), F_SETPIPE_SZ, Size() + kDelta),
SyscallSucceeds());
// Fill the buffer and try to change down.
std::vector<char> buf(Size());
- ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()),
+ ASSERT_THAT(write(wfd_.get(), buf.data(), buf.size()),
SyscallSucceedsWithValue(buf.size()));
- EXPECT_THAT(fcntl(wfd.get(), F_SETPIPE_SZ, Size() - kDelta),
+ EXPECT_THAT(fcntl(wfd_.get(), F_SETPIPE_SZ, Size() - kDelta),
SyscallFailsWithErrno(EBUSY));
}
@@ -522,23 +526,32 @@ TEST_P(PipeTest, Streaming) {
// We make too many calls to go through full save cycles.
DisableSave ds;
+ // Size() requires 2 syscalls, call it once and remember the value.
+ const int pipe_size = Size();
+
absl::Notification notify;
- ScopedThread t([this, &notify]() {
+ ScopedThread t([this, &notify, pipe_size]() {
// Don't start until it's full.
notify.WaitForNotification();
- for (int i = 0; i < 2 * Size(); i++) {
+ for (int i = 0; i < pipe_size; i++) {
int rbuf;
- ASSERT_THAT(read(rfd.get(), &rbuf, sizeof(rbuf)),
+ ASSERT_THAT(read(rfd_.get(), &rbuf, sizeof(rbuf)),
SyscallSucceedsWithValue(sizeof(rbuf)));
EXPECT_EQ(rbuf, i);
}
});
- for (int i = 0; i < 2 * Size(); i++) {
- int wbuf = i;
- ASSERT_THAT(write(wfd.get(), &wbuf, sizeof(wbuf)),
- SyscallSucceedsWithValue(sizeof(wbuf)));
- // Did that write just fill up the buffer? Wake up the reader. Once only.
- if ((i * sizeof(wbuf)) < Size() && ((i + 1) * sizeof(wbuf)) >= Size()) {
+
+ // Write 4 bytes * pipe_size. It will fill up the pipe once, notify the reader
+ // to start. Then we write pipe size worth 3 more times to ensure the reader
+ // can follow along.
+ ssize_t total = 0;
+ for (int i = 0; i < pipe_size; i++) {
+ ssize_t written = write(wfd_.get(), &i, sizeof(i));
+ ASSERT_THAT(written, SyscallSucceedsWithValue(sizeof(i)));
+ total += written;
+
+ // Is the next write about to fill up the buffer? Wake up the reader once.
+ if (total < pipe_size && (total + written) >= pipe_size) {
notify.Notify();
}
}
diff --git a/test/syscalls/linux/proc.cc b/test/syscalls/linux/proc.cc
index 924b98e3a..b440ba0df 100644
--- a/test/syscalls/linux/proc.cc
+++ b/test/syscalls/linux/proc.cc
@@ -206,8 +206,8 @@ PosixError WithSubprocess(SubprocessCallback const& running,
}
// Access the file returned by name when a subprocess is running.
-PosixError AccessWhileRunning(std::function<std::string(int pid)> name, int flags,
- std::function<void(int fd)> access) {
+PosixError AccessWhileRunning(std::function<std::string(int pid)> name,
+ int flags, std::function<void(int fd)> access) {
FileDescriptor fd;
return WithSubprocess(
[&](int pid) -> PosixError {
@@ -221,8 +221,8 @@ PosixError AccessWhileRunning(std::function<std::string(int pid)> name, int flag
}
// Access the file returned by name when the a subprocess is zombied.
-PosixError AccessWhileZombied(std::function<std::string(int pid)> name, int flags,
- std::function<void(int fd)> access) {
+PosixError AccessWhileZombied(std::function<std::string(int pid)> name,
+ int flags, std::function<void(int fd)> access) {
FileDescriptor fd;
return WithSubprocess(
[&](int pid) -> PosixError {
@@ -239,8 +239,8 @@ PosixError AccessWhileZombied(std::function<std::string(int pid)> name, int flag
}
// Access the file returned by name when the a subprocess is exited.
-PosixError AccessWhileExited(std::function<std::string(int pid)> name, int flags,
- std::function<void(int fd)> access) {
+PosixError AccessWhileExited(std::function<std::string(int pid)> name,
+ int flags, std::function<void(int fd)> access) {
FileDescriptor fd;
return WithSubprocess(
[&](int pid) -> PosixError {
@@ -704,7 +704,8 @@ TEST(ProcSelfExe, Absolute) {
// Sanity check for /proc/cpuinfo fields that must be present.
TEST(ProcCpuinfo, RequiredFieldsArePresent) {
- std::string proc_cpuinfo = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/cpuinfo"));
+ std::string proc_cpuinfo =
+ ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/cpuinfo"));
ASSERT_FALSE(proc_cpuinfo.empty());
std::vector<std::string> cpuinfo_fields = absl::StrSplit(proc_cpuinfo, '\n');
@@ -743,7 +744,8 @@ TEST(ProcCpuinfo, DeniesWrite) {
// Sanity checks that uptime is present.
TEST(ProcUptime, IsPresent) {
- std::string proc_uptime = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/uptime"));
+ std::string proc_uptime =
+ ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/uptime"));
ASSERT_FALSE(proc_uptime.empty());
std::vector<std::string> uptime_parts = absl::StrSplit(proc_uptime, ' ');
@@ -775,7 +777,8 @@ TEST(ProcUptime, IsPresent) {
}
TEST(ProcMeminfo, ContainsBasicFields) {
- std::string proc_meminfo = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/meminfo"));
+ std::string proc_meminfo =
+ ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/meminfo"));
EXPECT_THAT(proc_meminfo, AllOf(ContainsRegex(R"(MemTotal:\s+[0-9]+ kB)"),
ContainsRegex(R"(MemFree:\s+[0-9]+ kB)")));
}
@@ -853,12 +856,14 @@ TEST(ProcStat, Fields) {
}
TEST(ProcLoadavg, EndsWithNewline) {
- std::string proc_loadvg = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/loadavg"));
+ std::string proc_loadvg =
+ ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/loadavg"));
EXPECT_EQ(proc_loadvg.back(), '\n');
}
TEST(ProcLoadavg, Fields) {
- std::string proc_loadvg = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/loadavg"));
+ std::string proc_loadvg =
+ ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/loadavg"));
std::vector<std::string> lines = absl::StrSplit(proc_loadvg, '\n');
// Single line.
@@ -1238,10 +1243,12 @@ TEST(ProcPidStatTest, VmStats) {
EXPECT_NE('0', data_str[0]);
}
-// Parse an array of NUL-terminated char* arrays, returning a vector of strings.
+// Parse an array of NUL-terminated char* arrays, returning a vector of
+// strings.
std::vector<std::string> ParseNulTerminatedStrings(std::string contents) {
EXPECT_EQ('\0', contents.back());
- // The split will leave an empty std::string if the NUL-byte remains, so pop it.
+ // The split will leave an empty string if the NUL-byte remains, so pop
+ // it.
contents.pop_back();
return absl::StrSplit(contents, '\0');
@@ -1491,7 +1498,8 @@ TEST(ProcPidFile, SubprocessExited) {
}
PosixError DirContainsImpl(absl::string_view path,
- const std::vector<std::string>& targets, bool strict) {
+ const std::vector<std::string>& targets,
+ bool strict) {
ASSIGN_OR_RETURN_ERRNO(auto listing, ListDir(path, false));
bool success = true;
@@ -1530,8 +1538,8 @@ PosixError DirContainsExactly(absl::string_view path,
return DirContainsImpl(path, targets, true);
}
-PosixError EventuallyDirContainsExactly(absl::string_view path,
- const std::vector<std::string>& targets) {
+PosixError EventuallyDirContainsExactly(
+ absl::string_view path, const std::vector<std::string>& targets) {
constexpr int kRetryCount = 100;
const absl::Duration kRetryDelay = absl::Milliseconds(100);
@@ -1553,11 +1561,13 @@ TEST(ProcTask, Basic) {
DirContains("/proc/self/task", {".", "..", absl::StrCat(getpid())}));
}
-std::vector<std::string> TaskFiles(const std::vector<std::string>& initial_contents,
- const std::vector<pid_t>& pids) {
+std::vector<std::string> TaskFiles(
+ const std::vector<std::string>& initial_contents,
+ const std::vector<pid_t>& pids) {
return VecCat<std::string>(
initial_contents,
- ApplyVec<std::string>([](const pid_t p) { return absl::StrCat(p); }, pids));
+ ApplyVec<std::string>([](const pid_t p) { return absl::StrCat(p); },
+ pids));
}
std::vector<std::string> TaskFiles(const std::vector<pid_t>& pids) {
@@ -1894,7 +1904,8 @@ void CheckDuplicatesRecursively(std::string path) {
continue;
}
- ASSERT_EQ(children.find(std::string(dp->d_name)), children.end()) << dp->d_name;
+ ASSERT_EQ(children.find(std::string(dp->d_name)), children.end())
+ << dp->d_name;
children.insert(std::string(dp->d_name));
ASSERT_NE(dp->d_type, DT_UNKNOWN);
@@ -1953,6 +1964,22 @@ TEST(ProcPid, RootDumpableOwner) {
EXPECT_THAT(st.st_gid, AnyOf(Eq(0), Eq(65534)));
}
+TEST(Proc, GetdentsEnoent) {
+ FileDescriptor fd;
+ ASSERT_NO_ERRNO(WithSubprocess(
+ [&](int pid) -> PosixError {
+ // Running.
+ ASSIGN_OR_RETURN_ERRNO(fd, Open(absl::StrCat("/proc/", pid, "/task"),
+ O_RDONLY | O_DIRECTORY));
+
+ return NoError();
+ },
+ nullptr, nullptr));
+ char buf[1024];
+ ASSERT_THAT(syscall(SYS_getdents, fd.get(), buf, sizeof(buf)),
+ SyscallFailsWithErrno(ENOENT));
+}
+
} // namespace
} // namespace testing
} // namespace gvisor
diff --git a/test/syscalls/linux/proc_net_tcp.cc b/test/syscalls/linux/proc_net_tcp.cc
new file mode 100644
index 000000000..578b20680
--- /dev/null
+++ b/test/syscalls/linux/proc_net_tcp.cc
@@ -0,0 +1,281 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "gtest/gtest.h"
+#include "absl/strings/numbers.h"
+#include "absl/strings/str_join.h"
+#include "absl/strings/str_split.h"
+#include "test/syscalls/linux/ip_socket_test_util.h"
+#include "test/util/file_descriptor.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+using absl::StrCat;
+using absl::StrSplit;
+
+constexpr char kProcNetTCPHeader[] =
+ " sl local_address rem_address st tx_queue rx_queue tr tm->when "
+ "retrnsmt uid timeout inode "
+ " ";
+
+// Possible values of the "st" field in a /proc/net/tcp entry. Source: Linux
+// kernel, include/net/tcp_states.h.
+enum {
+ TCP_ESTABLISHED = 1,
+ TCP_SYN_SENT,
+ TCP_SYN_RECV,
+ TCP_FIN_WAIT1,
+ TCP_FIN_WAIT2,
+ TCP_TIME_WAIT,
+ TCP_CLOSE,
+ TCP_CLOSE_WAIT,
+ TCP_LAST_ACK,
+ TCP_LISTEN,
+ TCP_CLOSING,
+ TCP_NEW_SYN_RECV,
+
+ TCP_MAX_STATES
+};
+
+// TCPEntry represents a single entry from /proc/net/tcp.
+struct TCPEntry {
+ uint32_t local_addr;
+ uint16_t local_port;
+
+ uint32_t remote_addr;
+ uint16_t remote_port;
+
+ uint64_t state;
+ uint64_t uid;
+ uint64_t inode;
+};
+
+uint32_t IP(const struct sockaddr* addr) {
+ auto* in_addr = reinterpret_cast<const struct sockaddr_in*>(addr);
+ return in_addr->sin_addr.s_addr;
+}
+
+uint16_t Port(const struct sockaddr* addr) {
+ auto* in_addr = reinterpret_cast<const struct sockaddr_in*>(addr);
+ return ntohs(in_addr->sin_port);
+}
+
+// Finds the first entry in 'entries' for which 'predicate' returns true.
+// Returns true on match, and sets 'match' to point to the matching entry.
+bool FindBy(std::vector<TCPEntry> entries, TCPEntry* match,
+ std::function<bool(const TCPEntry&)> predicate) {
+ for (int i = 0; i < entries.size(); ++i) {
+ if (predicate(entries[i])) {
+ *match = entries[i];
+ return true;
+ }
+ }
+ return false;
+}
+
+bool FindByLocalAddr(std::vector<TCPEntry> entries, TCPEntry* match,
+ const struct sockaddr* addr) {
+ uint32_t host = IP(addr);
+ uint16_t port = Port(addr);
+ return FindBy(entries, match, [host, port](const TCPEntry& e) {
+ return (e.local_addr == host && e.local_port == port);
+ });
+}
+
+bool FindByRemoteAddr(std::vector<TCPEntry> entries, TCPEntry* match,
+ const struct sockaddr* addr) {
+ uint32_t host = IP(addr);
+ uint16_t port = Port(addr);
+ return FindBy(entries, match, [host, port](const TCPEntry& e) {
+ return (e.remote_addr == host && e.remote_port == port);
+ });
+}
+
+// Returns a parsed representation of /proc/net/tcp entries.
+PosixErrorOr<std::vector<TCPEntry>> ProcNetTCPEntries() {
+ std::string content;
+ RETURN_IF_ERRNO(GetContents("/proc/net/tcp", &content));
+
+ bool found_header = false;
+ std::vector<TCPEntry> entries;
+ std::vector<std::string> lines = StrSplit(content, '\n');
+ std::cerr << "<contents of /proc/net/tcp>" << std::endl;
+ for (std::string line : lines) {
+ std::cerr << line << std::endl;
+
+ if (!found_header) {
+ EXPECT_EQ(line, kProcNetTCPHeader);
+ found_header = true;
+ continue;
+ }
+ if (line.empty()) {
+ continue;
+ }
+
+ // Parse a single entry from /proc/net/tcp.
+ //
+ // Example entries:
+ //
+ // clang-format off
+ //
+ // sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode
+ // 0: 00000000:006F 00000000:0000 0A 00000000:00000000 00:00000000 00000000 0 0 1968 1 0000000000000000 100 0 0 10 0
+ // 1: 0100007F:7533 00000000:0000 0A 00000000:00000000 00:00000000 00000000 120 0 10684 1 0000000000000000 100 0 0 10 0
+ // ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^
+ // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
+ //
+ // clang-format on
+
+ TCPEntry entry;
+ std::vector<std::string> fields =
+ StrSplit(line, absl::ByAnyChar(": "), absl::SkipEmpty());
+
+ ASSIGN_OR_RETURN_ERRNO(entry.local_addr, AtoiBase(fields[1], 16));
+ ASSIGN_OR_RETURN_ERRNO(entry.local_port, AtoiBase(fields[2], 16));
+
+ ASSIGN_OR_RETURN_ERRNO(entry.remote_addr, AtoiBase(fields[3], 16));
+ ASSIGN_OR_RETURN_ERRNO(entry.remote_port, AtoiBase(fields[4], 16));
+
+ ASSIGN_OR_RETURN_ERRNO(entry.state, AtoiBase(fields[5], 16));
+ ASSIGN_OR_RETURN_ERRNO(entry.uid, Atoi<uint64_t>(fields[11]));
+ ASSIGN_OR_RETURN_ERRNO(entry.inode, Atoi<uint64_t>(fields[13]));
+
+ entries.push_back(entry);
+ }
+ std::cerr << "<end of /proc/net/tcp>" << std::endl;
+
+ return entries;
+}
+
+TEST(ProcNetTCP, Exists) {
+ const std::string content =
+ ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/tcp"));
+ const std::string header_line = StrCat(kProcNetTCPHeader, "\n");
+ if (IsRunningOnGvisor()) {
+ // Should be just the header since we don't have any tcp sockets yet.
+ EXPECT_EQ(content, header_line);
+ } else {
+ // On a general linux machine, we could have abitrary sockets on the system,
+ // so just check the header.
+ EXPECT_THAT(content, ::testing::StartsWith(header_line));
+ }
+}
+
+TEST(ProcNetTCP, EntryUID) {
+ auto sockets =
+ ASSERT_NO_ERRNO_AND_VALUE(IPv4TCPAcceptBindSocketPair(0).Create());
+ std::vector<TCPEntry> entries =
+ ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCPEntries());
+ TCPEntry e;
+ EXPECT_TRUE(FindByLocalAddr(entries, &e, sockets->first_addr()));
+ EXPECT_EQ(e.uid, geteuid());
+ EXPECT_TRUE(FindByRemoteAddr(entries, &e, sockets->first_addr()));
+ EXPECT_EQ(e.uid, geteuid());
+}
+
+TEST(ProcNetTCP, BindAcceptConnect) {
+ auto sockets =
+ ASSERT_NO_ERRNO_AND_VALUE(IPv4TCPAcceptBindSocketPair(0).Create());
+ std::vector<TCPEntry> entries =
+ ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCPEntries());
+ // We can only make assertions about the total number of entries if we control
+ // the entire "machine".
+ if (IsRunningOnGvisor()) {
+ EXPECT_EQ(entries.size(), 2);
+ }
+
+ TCPEntry e;
+ EXPECT_TRUE(FindByLocalAddr(entries, &e, sockets->first_addr()));
+ EXPECT_TRUE(FindByRemoteAddr(entries, &e, sockets->first_addr()));
+}
+
+TEST(ProcNetTCP, InodeReasonable) {
+ auto sockets =
+ ASSERT_NO_ERRNO_AND_VALUE(IPv4TCPAcceptBindSocketPair(0).Create());
+ std::vector<TCPEntry> entries =
+ ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCPEntries());
+
+ TCPEntry accepted_entry;
+ ASSERT_TRUE(FindByLocalAddr(entries, &accepted_entry, sockets->first_addr()));
+ EXPECT_NE(accepted_entry.inode, 0);
+
+ TCPEntry client_entry;
+ ASSERT_TRUE(FindByRemoteAddr(entries, &client_entry, sockets->first_addr()));
+ EXPECT_NE(client_entry.inode, 0);
+ EXPECT_NE(accepted_entry.inode, client_entry.inode);
+}
+
+TEST(ProcNetTCP, State) {
+ std::unique_ptr<FileDescriptor> server =
+ ASSERT_NO_ERRNO_AND_VALUE(IPv4TCPUnboundSocket(0).Create());
+
+ auto test_addr = V4Loopback();
+ ASSERT_THAT(
+ bind(server->get(), reinterpret_cast<struct sockaddr*>(&test_addr.addr),
+ test_addr.addr_len),
+ SyscallSucceeds());
+
+ struct sockaddr addr;
+ socklen_t addrlen = sizeof(struct sockaddr);
+ ASSERT_THAT(getsockname(server->get(), &addr, &addrlen), SyscallSucceeds());
+ ASSERT_EQ(addrlen, sizeof(struct sockaddr));
+
+ ASSERT_THAT(listen(server->get(), 10), SyscallSucceeds());
+ std::vector<TCPEntry> entries =
+ ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCPEntries());
+ TCPEntry listen_entry;
+ ASSERT_TRUE(FindByLocalAddr(entries, &listen_entry, &addr));
+ EXPECT_EQ(listen_entry.state, TCP_LISTEN);
+
+ std::unique_ptr<FileDescriptor> client =
+ ASSERT_NO_ERRNO_AND_VALUE(IPv4TCPUnboundSocket(0).Create());
+ ASSERT_THAT(connect(client->get(), &addr, addrlen), SyscallSucceeds());
+ entries = ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCPEntries());
+ ASSERT_TRUE(FindByLocalAddr(entries, &listen_entry, &addr));
+ EXPECT_EQ(listen_entry.state, TCP_LISTEN);
+ TCPEntry client_entry;
+ ASSERT_TRUE(FindByRemoteAddr(entries, &client_entry, &addr));
+ EXPECT_EQ(client_entry.state, TCP_ESTABLISHED);
+
+ FileDescriptor accepted =
+ ASSERT_NO_ERRNO_AND_VALUE(Accept(server->get(), nullptr, nullptr));
+
+ const uint32_t accepted_local_host = IP(&addr);
+ const uint16_t accepted_local_port = Port(&addr);
+
+ entries = ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCPEntries());
+ TCPEntry accepted_entry;
+ ASSERT_TRUE(FindBy(entries, &accepted_entry,
+ [client_entry, accepted_local_host,
+ accepted_local_port](const TCPEntry& e) {
+ return e.local_addr == accepted_local_host &&
+ e.local_port == accepted_local_port &&
+ e.remote_addr == client_entry.local_addr &&
+ e.remote_port == client_entry.local_port;
+ }));
+ EXPECT_EQ(accepted_entry.state, TCP_ESTABLISHED);
+}
+
+} // namespace
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/proc_net_unix.cc b/test/syscalls/linux/proc_net_unix.cc
index 82d325c17..9b9be66ff 100644
--- a/test/syscalls/linux/proc_net_unix.cc
+++ b/test/syscalls/linux/proc_net_unix.cc
@@ -67,7 +67,7 @@ std::string ExtractPath(const struct sockaddr* addr) {
// Abstract socket paths are null padded to the end of the struct
// sockaddr. However, these null bytes may or may not show up in
// /proc/net/unix depending on the kernel version. Truncate after the first
- // null byte (by treating path as a c-std::string).
+ // null byte (by treating path as a c-string).
return StrCat("@", &path[1]);
}
return std::string(path);
@@ -80,7 +80,7 @@ PosixErrorOr<std::vector<UnixEntry>> ProcNetUnixEntries() {
bool skipped_header = false;
std::vector<UnixEntry> entries;
- std::vector<std::string> lines = absl::StrSplit(content, absl::ByAnyChar("\n"));
+ std::vector<std::string> lines = absl::StrSplit(content, '\n');
std::cerr << "<contents of /proc/net/unix>" << std::endl;
for (std::string line : lines) {
// Emit the proc entry to the test output to provide context for the test
@@ -123,7 +123,8 @@ PosixErrorOr<std::vector<UnixEntry>> ProcNetUnixEntries() {
UnixEntry entry;
// Process the first 6 fields, up to but not including "Inode".
- std::vector<std::string> fields = absl::StrSplit(line, absl::MaxSplits(' ', 6));
+ std::vector<std::string> fields =
+ absl::StrSplit(line, absl::MaxSplits(' ', 6));
if (fields.size() < 7) {
return PosixError(EINVAL, StrFormat("Invalid entry: '%s'\n", line));
@@ -162,7 +163,7 @@ PosixErrorOr<std::vector<UnixEntry>> ProcNetUnixEntries() {
// Finds the first entry in 'entries' for which 'predicate' returns true.
// Returns true on match, and sets 'match' to point to the matching entry.
bool FindBy(std::vector<UnixEntry> entries, UnixEntry* match,
- std::function<bool(UnixEntry)> predicate) {
+ std::function<bool(const UnixEntry&)> predicate) {
for (int i = 0; i < entries.size(); ++i) {
if (predicate(entries[i])) {
*match = entries[i];
@@ -174,7 +175,8 @@ bool FindBy(std::vector<UnixEntry> entries, UnixEntry* match,
bool FindByPath(std::vector<UnixEntry> entries, UnixEntry* match,
const std::string& path) {
- return FindBy(entries, match, [path](UnixEntry e) { return e.path == path; });
+ return FindBy(entries, match,
+ [path](const UnixEntry& e) { return e.path == path; });
}
TEST(ProcNetUnix, Exists) {
diff --git a/test/syscalls/linux/pty.cc b/test/syscalls/linux/pty.cc
index f926ac0f9..d1ab4703f 100644
--- a/test/syscalls/linux/pty.cc
+++ b/test/syscalls/linux/pty.cc
@@ -105,7 +105,7 @@ struct Field {
uint64_t value;
};
-// ParseFields returns a std::string representation of value, using the names in
+// ParseFields returns a string representation of value, using the names in
// fields.
std::string ParseFields(const Field* fields, size_t len, uint64_t value) {
bool first = true;
diff --git a/test/syscalls/linux/raw_socket_icmp.cc b/test/syscalls/linux/raw_socket_icmp.cc
index 24d9dc79a..7fe7c03a5 100644
--- a/test/syscalls/linux/raw_socket_icmp.cc
+++ b/test/syscalls/linux/raw_socket_icmp.cc
@@ -234,7 +234,7 @@ TEST_F(RawSocketICMPTest, MultipleSocketReceive) {
}
// A raw ICMP socket and ping socket should both receive the ICMP packets
-// indended for the ping socket.
+// intended for the ping socket.
TEST_F(RawSocketICMPTest, RawAndPingSockets) {
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
diff --git a/test/syscalls/linux/readv_common.h b/test/syscalls/linux/readv_common.h
index b16179fca..2fa40c35f 100644
--- a/test/syscalls/linux/readv_common.h
+++ b/test/syscalls/linux/readv_common.h
@@ -20,7 +20,7 @@
namespace gvisor {
namespace testing {
-// A NUL-terminated std::string containing the data used by tests using the following
+// A NUL-terminated string containing the data used by tests using the following
// test helpers.
extern const char kReadvTestData[];
diff --git a/test/syscalls/linux/sendfile.cc b/test/syscalls/linux/sendfile.cc
index 2fbb3f4ef..e5d72e28a 100644
--- a/test/syscalls/linux/sendfile.cc
+++ b/test/syscalls/linux/sendfile.cc
@@ -135,7 +135,7 @@ TEST(SendFileTest, SendTriviallyWithBothFilesReadWrite) {
TEST(SendFileTest, SendAndUpdateFileOffset) {
// Create temp files.
- // Test input std::string length must be > 2 AND even.
+ // Test input string length must be > 2 AND even.
constexpr char kData[] = "The slings and arrows of outrageous fortune,";
constexpr int kDataSize = sizeof(kData) - 1;
constexpr int kHalfDataSize = kDataSize / 2;
@@ -180,7 +180,7 @@ TEST(SendFileTest, SendAndUpdateFileOffset) {
TEST(SendFileTest, SendAndUpdateFileOffsetFromNonzeroStartingPoint) {
// Create temp files.
- // Test input std::string length must be > 2 AND divisible by 4.
+ // Test input string length must be > 2 AND divisible by 4.
constexpr char kData[] = "The slings and arrows of outrageous fortune,";
constexpr int kDataSize = sizeof(kData) - 1;
constexpr int kHalfDataSize = kDataSize / 2;
@@ -233,7 +233,7 @@ TEST(SendFileTest, SendAndUpdateFileOffsetFromNonzeroStartingPoint) {
TEST(SendFileTest, SendAndUpdateGivenOffset) {
// Create temp files.
- // Test input std::string length must be >= 4 AND divisible by 4.
+ // Test input string length must be >= 4 AND divisible by 4.
constexpr char kData[] = "Or to take Arms against a Sea of troubles,";
constexpr int kDataSize = sizeof(kData) + 1;
constexpr int kHalfDataSize = kDataSize / 2;
diff --git a/test/syscalls/linux/sendfile_socket.cc b/test/syscalls/linux/sendfile_socket.cc
index 66adda515..1c56540bc 100644
--- a/test/syscalls/linux/sendfile_socket.cc
+++ b/test/syscalls/linux/sendfile_socket.cc
@@ -33,9 +33,69 @@ namespace gvisor {
namespace testing {
namespace {
+class SendFileTest : public ::testing::TestWithParam<int> {
+ protected:
+ PosixErrorOr<std::tuple<int, int>> Sockets() {
+ // Bind a server socket.
+ int family = GetParam();
+ struct sockaddr server_addr = {};
+ switch (family) {
+ case AF_INET: {
+ struct sockaddr_in *server_addr_in =
+ reinterpret_cast<struct sockaddr_in *>(&server_addr);
+ server_addr_in->sin_family = family;
+ server_addr_in->sin_addr.s_addr = INADDR_ANY;
+ break;
+ }
+ case AF_UNIX: {
+ struct sockaddr_un *server_addr_un =
+ reinterpret_cast<struct sockaddr_un *>(&server_addr);
+ server_addr_un->sun_family = family;
+ server_addr_un->sun_path[0] = '\0';
+ break;
+ }
+ default:
+ return PosixError(EINVAL);
+ }
+ int server = socket(family, SOCK_STREAM, 0);
+ if (bind(server, &server_addr, sizeof(server_addr)) < 0) {
+ return PosixError(errno);
+ }
+ if (listen(server, 1) < 0) {
+ close(server);
+ return PosixError(errno);
+ }
+
+ // Fetch the address; both are anonymous.
+ socklen_t length = sizeof(server_addr);
+ if (getsockname(server, &server_addr, &length) < 0) {
+ close(server);
+ return PosixError(errno);
+ }
+
+ // Connect the client.
+ int client = socket(family, SOCK_STREAM, 0);
+ if (connect(client, &server_addr, length) < 0) {
+ close(server);
+ close(client);
+ return PosixError(errno);
+ }
+
+ // Accept on the server.
+ int server_client = accept(server, nullptr, 0);
+ if (server_client < 0) {
+ close(server);
+ close(client);
+ return PosixError(errno);
+ }
+ close(server);
+ return std::make_tuple(client, server_client);
+ }
+};
+
// Sends large file to exercise the path that read and writes data multiple
// times, esp. when more data is read than can be written.
-TEST(SendFileTest, SendMultiple) {
+TEST_P(SendFileTest, SendMultiple) {
std::vector<char> data(5 * 1024 * 1024);
RandomizeBuffer(data.data(), data.size());
@@ -45,34 +105,20 @@ TEST(SendFileTest, SendMultiple) {
TempPath::kDefaultFileMode));
const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
- // Use a socket for target file to make the write window small.
- const FileDescriptor server(socket(AF_INET, SOCK_STREAM, IPPROTO_TCP));
- ASSERT_THAT(server.get(), SyscallSucceeds());
-
- struct sockaddr_in server_addr = {};
- server_addr.sin_family = AF_INET;
- server_addr.sin_addr.s_addr = INADDR_ANY;
- ASSERT_THAT(
- bind(server.get(), reinterpret_cast<struct sockaddr *>(&server_addr),
- sizeof(server_addr)),
- SyscallSucceeds());
- ASSERT_THAT(listen(server.get(), 1), SyscallSucceeds());
+ // Create sockets.
+ std::tuple<int, int> fds = ASSERT_NO_ERRNO_AND_VALUE(Sockets());
+ const FileDescriptor server(std::get<0>(fds));
+ FileDescriptor client(std::get<1>(fds)); // non-const, reset is used.
// Thread that reads data from socket and dumps to a file.
- ScopedThread th([&server, &out_file, &server_addr] {
- socklen_t addrlen = sizeof(server_addr);
- const FileDescriptor fd(RetryEINTR(accept)(
- server.get(), reinterpret_cast<struct sockaddr *>(&server_addr),
- &addrlen));
- ASSERT_THAT(fd.get(), SyscallSucceeds());
-
+ ScopedThread th([&] {
FileDescriptor outf =
ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY));
// Read until socket is closed.
char buf[10240];
for (int cnt = 0;; cnt++) {
- int r = RetryEINTR(read)(fd.get(), buf, sizeof(buf));
+ int r = RetryEINTR(read)(server.get(), buf, sizeof(buf));
// We cannot afford to save on every read() call.
if (cnt % 1000 == 0) {
ASSERT_THAT(r, SyscallSucceeds());
@@ -99,25 +145,6 @@ TEST(SendFileTest, SendMultiple) {
const FileDescriptor inf =
ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY));
- FileDescriptor outf(socket(AF_INET, SOCK_STREAM, IPPROTO_TCP));
- ASSERT_THAT(outf.get(), SyscallSucceeds());
-
- // Get the port bound by the listening socket.
- socklen_t addrlen = sizeof(server_addr);
- ASSERT_THAT(getsockname(server.get(),
- reinterpret_cast<sockaddr *>(&server_addr), &addrlen),
- SyscallSucceeds());
-
- struct sockaddr_in addr = {};
- addr.sin_family = AF_INET;
- addr.sin_addr.s_addr = inet_addr("127.0.0.1");
- addr.sin_port = server_addr.sin_port;
- std::cout << "Connecting on port=" << server_addr.sin_port;
- ASSERT_THAT(
- RetryEINTR(connect)(
- outf.get(), reinterpret_cast<struct sockaddr *>(&addr), sizeof(addr)),
- SyscallSucceeds());
-
int cnt = 0;
for (size_t sent = 0; sent < data.size(); cnt++) {
const size_t remain = data.size() - sent;
@@ -125,7 +152,7 @@ TEST(SendFileTest, SendMultiple) {
<< ", remain=" << remain;
// Send data and verify that sendfile returns the correct value.
- int res = sendfile(outf.get(), inf.get(), nullptr, remain);
+ int res = sendfile(client.get(), inf.get(), nullptr, remain);
// We cannot afford to save on every sendfile() call.
if (cnt % 120 == 0) {
MaybeSave();
@@ -142,17 +169,74 @@ TEST(SendFileTest, SendMultiple) {
}
// Close socket to stop thread.
- outf.reset();
+ client.reset();
th.Join();
// Verify that the output file has the correct data.
- outf = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDONLY));
+ const FileDescriptor outf =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDONLY));
std::vector<char> actual(data.size(), '\0');
ASSERT_THAT(RetryEINTR(read)(outf.get(), actual.data(), actual.size()),
SyscallSucceedsWithValue(actual.size()));
ASSERT_EQ(memcmp(data.data(), actual.data(), data.size()), 0);
}
+TEST_P(SendFileTest, Shutdown) {
+ // Create a socket.
+ std::tuple<int, int> fds = ASSERT_NO_ERRNO_AND_VALUE(Sockets());
+ const FileDescriptor client(std::get<0>(fds));
+ FileDescriptor server(std::get<1>(fds)); // non-const, released below.
+
+ // If this is a TCP socket, then turn off linger.
+ if (GetParam() == AF_INET) {
+ struct linger sl;
+ sl.l_onoff = 1;
+ sl.l_linger = 0;
+ ASSERT_THAT(
+ setsockopt(server.get(), SOL_SOCKET, SO_LINGER, &sl, sizeof(sl)),
+ SyscallSucceeds());
+ }
+
+ // Create a 1m file with random data.
+ std::vector<char> data(1024 * 1024);
+ RandomizeBuffer(data.data(), data.size());
+ const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), absl::string_view(data.data(), data.size()),
+ TempPath::kDefaultFileMode));
+ const FileDescriptor inf =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY));
+
+ // Read some data, then shutdown the socket. We don't actually care about
+ // checking the contents (other tests do that), so we just re-use the same
+ // buffer as above.
+ ScopedThread t([&]() {
+ int done = 0;
+ while (done < data.size()) {
+ int n = read(server.get(), data.data(), data.size());
+ ASSERT_THAT(n, SyscallSucceeds());
+ done += n;
+ }
+ // Close the server side socket.
+ ASSERT_THAT(close(server.release()), SyscallSucceeds());
+ });
+
+ // Continuously stream from the file to the socket. Note we do not assert
+ // that a specific amount of data has been written at any time, just that some
+ // data is written. Eventually, we should get a connection reset error.
+ while (1) {
+ off_t offset = 0; // Always read from the start.
+ int n = sendfile(client.get(), inf.get(), &offset, data.size());
+ EXPECT_THAT(n, AnyOf(SyscallFailsWithErrno(ECONNRESET),
+ SyscallFailsWithErrno(EPIPE), SyscallSucceeds()));
+ if (n <= 0) {
+ break;
+ }
+ }
+}
+
+INSTANTIATE_TEST_SUITE_P(AddressFamily, SendFileTest,
+ ::testing::Values(AF_UNIX, AF_INET));
+
} // namespace
} // namespace testing
} // namespace gvisor
diff --git a/test/syscalls/linux/socket_generic.cc b/test/syscalls/linux/socket_generic.cc
index f99f3fe62..51d614639 100644
--- a/test/syscalls/linux/socket_generic.cc
+++ b/test/syscalls/linux/socket_generic.cc
@@ -21,6 +21,7 @@
#include "gtest/gtest.h"
#include "gtest/gtest.h"
+#include "absl/strings/str_format.h"
#include "absl/strings/string_view.h"
#include "test/syscalls/linux/socket_test_util.h"
#include "test/syscalls/linux/unix_domain_socket_test_util.h"
@@ -687,5 +688,54 @@ TEST_P(AllSocketPairTest, RecvTimeoutWaitAll) {
EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
}
+TEST_P(AllSocketPairTest, GetSockoptType) {
+ int type = GetParam().type;
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ for (const int fd : {sockets->first_fd(), sockets->second_fd()}) {
+ int opt;
+ socklen_t optlen = sizeof(opt);
+ EXPECT_THAT(getsockopt(fd, SOL_SOCKET, SO_TYPE, &opt, &optlen),
+ SyscallSucceeds());
+
+ // Type may have SOCK_NONBLOCK and SOCK_CLOEXEC ORed into it. Remove these
+ // before comparison.
+ type &= ~(SOCK_NONBLOCK | SOCK_CLOEXEC);
+ EXPECT_EQ(opt, type) << absl::StrFormat(
+ "getsockopt(%d, SOL_SOCKET, SO_TYPE, &opt, &optlen) => opt=%d was "
+ "unexpected",
+ fd, opt);
+ }
+}
+
+TEST_P(AllSocketPairTest, GetSockoptDomain) {
+ const int domain = GetParam().domain;
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ for (const int fd : {sockets->first_fd(), sockets->second_fd()}) {
+ int opt;
+ socklen_t optlen = sizeof(opt);
+ EXPECT_THAT(getsockopt(fd, SOL_SOCKET, SO_DOMAIN, &opt, &optlen),
+ SyscallSucceeds());
+ EXPECT_EQ(opt, domain) << absl::StrFormat(
+ "getsockopt(%d, SOL_SOCKET, SO_DOMAIN, &opt, &optlen) => opt=%d was "
+ "unexpected",
+ fd, opt);
+ }
+}
+
+TEST_P(AllSocketPairTest, GetSockoptProtocol) {
+ const int protocol = GetParam().protocol;
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ for (const int fd : {sockets->first_fd(), sockets->second_fd()}) {
+ int opt;
+ socklen_t optlen = sizeof(opt);
+ EXPECT_THAT(getsockopt(fd, SOL_SOCKET, SO_PROTOCOL, &opt, &optlen),
+ SyscallSucceeds());
+ EXPECT_EQ(opt, protocol) << absl::StrFormat(
+ "getsockopt(%d, SOL_SOCKET, SO_PROTOCOL, &opt, &optlen) => opt=%d was "
+ "unexpected",
+ fd, opt);
+ }
+}
+
} // namespace testing
} // namespace gvisor
diff --git a/test/syscalls/linux/socket_ip_tcp_generic.cc b/test/syscalls/linux/socket_ip_tcp_generic.cc
index 5b198f49d..a43cf9bce 100644
--- a/test/syscalls/linux/socket_ip_tcp_generic.cc
+++ b/test/syscalls/linux/socket_ip_tcp_generic.cc
@@ -592,5 +592,110 @@ TEST_P(TCPSocketPairTest, MsgTruncMsgPeek) {
EXPECT_EQ(0, memcmp(received_data2, sent_data, sizeof(sent_data)));
}
+TEST_P(TCPSocketPairTest, SetCongestionControlSucceedsForSupported) {
+ // This is Linux's net/tcp.h TCP_CA_NAME_MAX.
+ const int kTcpCaNameMax = 16;
+
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ // Netstack only supports reno & cubic so we only test these two values here.
+ {
+ const char kSetCC[kTcpCaNameMax] = "reno";
+ ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CONGESTION,
+ &kSetCC, strlen(kSetCC)),
+ SyscallSucceedsWithValue(0));
+
+ char got_cc[kTcpCaNameMax];
+ memset(got_cc, '1', sizeof(got_cc));
+ socklen_t optlen = sizeof(got_cc);
+ ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CONGESTION,
+ &got_cc, &optlen),
+ SyscallSucceedsWithValue(0));
+ EXPECT_EQ(0, memcmp(got_cc, kSetCC, sizeof(kSetCC)));
+ }
+ {
+ const char kSetCC[kTcpCaNameMax] = "cubic";
+ ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CONGESTION,
+ &kSetCC, strlen(kSetCC)),
+ SyscallSucceedsWithValue(0));
+
+ char got_cc[kTcpCaNameMax];
+ memset(got_cc, '1', sizeof(got_cc));
+ socklen_t optlen = sizeof(got_cc);
+ ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CONGESTION,
+ &got_cc, &optlen),
+ SyscallSucceedsWithValue(0));
+ EXPECT_EQ(0, memcmp(got_cc, kSetCC, sizeof(kSetCC)));
+ }
+}
+
+TEST_P(TCPSocketPairTest, SetGetTCPCongestionShortReadBuffer) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ {
+ // Verify that getsockopt/setsockopt work with buffers smaller than
+ // kTcpCaNameMax.
+ const char kSetCC[] = "cubic";
+ ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CONGESTION,
+ &kSetCC, strlen(kSetCC)),
+ SyscallSucceedsWithValue(0));
+
+ char got_cc[sizeof(kSetCC)];
+ socklen_t optlen = sizeof(got_cc);
+ ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CONGESTION,
+ &got_cc, &optlen),
+ SyscallSucceedsWithValue(0));
+ EXPECT_EQ(0, memcmp(got_cc, kSetCC, sizeof(got_cc)));
+ }
+}
+
+TEST_P(TCPSocketPairTest, SetGetTCPCongestionLargeReadBuffer) {
+ // This is Linux's net/tcp.h TCP_CA_NAME_MAX.
+ const int kTcpCaNameMax = 16;
+
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ {
+ // Verify that getsockopt works with buffers larger than
+ // kTcpCaNameMax.
+ const char kSetCC[] = "cubic";
+ ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CONGESTION,
+ &kSetCC, strlen(kSetCC)),
+ SyscallSucceedsWithValue(0));
+
+ char got_cc[kTcpCaNameMax + 5];
+ socklen_t optlen = sizeof(got_cc);
+ ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CONGESTION,
+ &got_cc, &optlen),
+ SyscallSucceedsWithValue(0));
+ // Linux copies the minimum of kTcpCaNameMax or the length of the passed in
+ // buffer and sets optlen to the number of bytes actually copied
+ // irrespective of the actual length of the congestion control name.
+ EXPECT_EQ(kTcpCaNameMax, optlen);
+ EXPECT_EQ(0, memcmp(got_cc, kSetCC, sizeof(kSetCC)));
+ }
+}
+
+TEST_P(TCPSocketPairTest, SetCongestionControlFailsForUnsupported) {
+ // This is Linux's net/tcp.h TCP_CA_NAME_MAX.
+ const int kTcpCaNameMax = 16;
+
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ char old_cc[kTcpCaNameMax];
+ socklen_t optlen = sizeof(old_cc);
+ ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CONGESTION,
+ &old_cc, &optlen),
+ SyscallSucceedsWithValue(0));
+
+ const char kSetCC[] = "invalid_ca_cc";
+ ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CONGESTION,
+ &kSetCC, strlen(kSetCC)),
+ SyscallFailsWithErrno(ENOENT));
+
+ char got_cc[kTcpCaNameMax];
+ optlen = sizeof(got_cc);
+ ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_CONGESTION,
+ &got_cc, &optlen),
+ SyscallSucceedsWithValue(0));
+ EXPECT_EQ(0, memcmp(got_cc, old_cc, sizeof(old_cc)));
+}
+
} // namespace testing
} // namespace gvisor
diff --git a/test/syscalls/linux/socket_netlink_route.cc b/test/syscalls/linux/socket_netlink_route.cc
index c8693225f..b5c38f27e 100644
--- a/test/syscalls/linux/socket_netlink_route.cc
+++ b/test/syscalls/linux/socket_netlink_route.cc
@@ -23,6 +23,7 @@
#include <vector>
#include "gtest/gtest.h"
+#include "absl/strings/str_format.h"
#include "test/syscalls/linux/socket_netlink_util.h"
#include "test/syscalls/linux/socket_test_util.h"
#include "test/util/cleanup.h"
@@ -144,24 +145,56 @@ TEST(NetlinkRouteTest, GetPeerName) {
EXPECT_EQ(addr.nl_pid, 0);
}
-using IntSockOptTest = ::testing::TestWithParam<int>;
+// Parameters for GetSockOpt test. They are:
+// 0: Socket option to query.
+// 1: A predicate to run on the returned sockopt value. Should return true if
+// the value is considered ok.
+// 2: A description of what the sockopt value is expected to be. Should complete
+// the sentence "<value> was unexpected, expected <description>"
+using SockOptTest = ::testing::TestWithParam<
+ std::tuple<int, std::function<bool(int)>, std::string>>;
+
+TEST_P(SockOptTest, GetSockOpt) {
+ int sockopt = std::get<0>(GetParam());
+ auto verifier = std::get<1>(GetParam());
+ std::string verifier_description = std::get<2>(GetParam());
-TEST_P(IntSockOptTest, GetSockOpt) {
FileDescriptor fd =
ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE));
int res;
socklen_t len = sizeof(res);
- EXPECT_THAT(getsockopt(fd.get(), SOL_SOCKET, GetParam(), &res, &len),
+ EXPECT_THAT(getsockopt(fd.get(), SOL_SOCKET, sockopt, &res, &len),
SyscallSucceeds());
EXPECT_EQ(len, sizeof(res));
- EXPECT_GT(res, 0);
+ EXPECT_TRUE(verifier(res)) << absl::StrFormat(
+ "getsockopt(%d, SOL_SOCKET, %d, &res, &len) => res=%d was unexpected, "
+ "expected %s",
+ fd.get(), sockopt, res, verifier_description);
+}
+
+std::function<bool(int)> IsPositive() {
+ return [](int val) { return val > 0; };
+}
+
+std::function<bool(int)> IsEqual(int target) {
+ return [target](int val) { return val == target; };
}
-INSTANTIATE_TEST_SUITE_P(NetlinkRouteTest, IntSockOptTest,
- ::testing::Values(SO_SNDBUF, SO_RCVBUF));
+INSTANTIATE_TEST_SUITE_P(
+ NetlinkRouteTest, SockOptTest,
+ ::testing::Values(
+ std::make_tuple(SO_SNDBUF, IsPositive(), "positive send buffer size"),
+ std::make_tuple(SO_RCVBUF, IsPositive(),
+ "positive receive buffer size"),
+ std::make_tuple(SO_TYPE, IsEqual(SOCK_RAW),
+ absl::StrFormat("SOCK_RAW (%d)", SOCK_RAW)),
+ std::make_tuple(SO_DOMAIN, IsEqual(AF_NETLINK),
+ absl::StrFormat("AF_NETLINK (%d)", AF_NETLINK)),
+ std::make_tuple(SO_PROTOCOL, IsEqual(NETLINK_ROUTE),
+ absl::StrFormat("NETLINK_ROUTE (%d)", NETLINK_ROUTE))));
// Validates the reponses to RTM_GETLINK + NLM_F_DUMP.
void CheckGetLinkResponse(const struct nlmsghdr* hdr, int seq, int port) {
diff --git a/test/syscalls/linux/socket_test_util.cc b/test/syscalls/linux/socket_test_util.cc
index da69de37c..4f65cf5ae 100644
--- a/test/syscalls/linux/socket_test_util.cc
+++ b/test/syscalls/linux/socket_test_util.cc
@@ -457,7 +457,8 @@ Creator<SocketPair> UDPUnboundSocketPairCreator(int domain, int type,
SocketPairKind Reversed(SocketPairKind const& base) {
auto const& creator = base.creator;
return SocketPairKind{
- absl::StrCat("reversed ", base.description),
+ absl::StrCat("reversed ", base.description), base.domain, base.type,
+ base.protocol,
[creator]() -> PosixErrorOr<std::unique_ptr<ReversedSocketPair>> {
ASSIGN_OR_RETURN_ERRNO(auto creator_value, creator());
return absl::make_unique<ReversedSocketPair>(std::move(creator_value));
@@ -542,8 +543,8 @@ struct sockaddr_storage AddrFDSocketPair::to_storage(const sockaddr_in6& addr) {
SocketKind SimpleSocket(int fam, int type, int proto) {
return SocketKind{
- absl::StrCat("Family ", fam, ", type ", type, ", proto ", proto),
- SyscallSocketCreator(fam, type, proto)};
+ absl::StrCat("Family ", fam, ", type ", type, ", proto ", proto), fam,
+ type, proto, SyscallSocketCreator(fam, type, proto)};
}
ssize_t SendLargeSendMsg(const std::unique_ptr<SocketPair>& sockets,
diff --git a/test/syscalls/linux/socket_test_util.h b/test/syscalls/linux/socket_test_util.h
index 058313986..4fd59767a 100644
--- a/test/syscalls/linux/socket_test_util.h
+++ b/test/syscalls/linux/socket_test_util.h
@@ -287,6 +287,9 @@ Creator<FileDescriptor> UnboundSocketCreator(int domain, int type,
// a function that creates such a socket pair.
struct SocketPairKind {
std::string description;
+ int domain;
+ int type;
+ int protocol;
Creator<SocketPair> creator;
// Create creates a socket pair of this kind.
@@ -297,6 +300,9 @@ struct SocketPairKind {
// a function that creates such a socket.
struct SocketKind {
std::string description;
+ int domain;
+ int type;
+ int protocol;
Creator<FileDescriptor> creator;
// Create creates a socket pair of this kind.
@@ -353,6 +359,7 @@ Middleware SetSockOpt(int level, int optname, T* value) {
return SocketPairKind{
absl::StrCat("setsockopt(", level, ", ", optname, ", ", *value, ") ",
base.description),
+ base.domain, base.type, base.protocol,
[creator, level, optname,
value]() -> PosixErrorOr<std::unique_ptr<SocketPair>> {
ASSIGN_OR_RETURN_ERRNO(auto creator_value, creator());
diff --git a/test/syscalls/linux/socket_unix_cmsg.cc b/test/syscalls/linux/socket_unix_cmsg.cc
index b0ab26847..1092e29b1 100644
--- a/test/syscalls/linux/socket_unix_cmsg.cc
+++ b/test/syscalls/linux/socket_unix_cmsg.cc
@@ -220,7 +220,7 @@ TEST_P(UnixSocketPairCmsgTest, BasicFDPassNoSpaceMsgCtrunc) {
// BasicFDPassNullControlMsgCtrunc sends an FD and sets contradictory values for
// msg_controllen and msg_control. msg_controllen is set to the correct size to
-// accomidate the FD, but msg_control is set to NULL. In this case, msg_control
+// accommodate the FD, but msg_control is set to NULL. In this case, msg_control
// should override msg_controllen.
TEST_P(UnixSocketPairCmsgTest, BasicFDPassNullControlMsgCtrunc) {
// FIXME(gvisor.dev/issue/207): Fix handling of NULL msg_control.
@@ -531,7 +531,7 @@ TEST_P(UnixSocketPairCmsgTest, FDPassInterspersed1) {
}
// FDPassInterspersed2 checks that sent control messages cannot be read after
-// their assocated data has been read while ignoring the control message by
+// their associated data has been read while ignoring the control message by
// using read(2) instead of recvmsg(2).
TEST_P(UnixSocketPairCmsgTest, FDPassInterspersed2) {
auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
diff --git a/test/syscalls/linux/socket_unix_unbound_stream.cc b/test/syscalls/linux/socket_unix_unbound_stream.cc
index 091d546b3..e483d2777 100644
--- a/test/syscalls/linux/socket_unix_unbound_stream.cc
+++ b/test/syscalls/linux/socket_unix_unbound_stream.cc
@@ -29,7 +29,7 @@ namespace {
using UnixStreamSocketPairTest = SocketPairTest;
// FDPassPartialRead checks that sent control messages cannot be read after
-// any of their assocated data has been read while ignoring the control message
+// any of their associated data has been read while ignoring the control message
// by using read(2) instead of recvmsg(2).
TEST_P(UnixStreamSocketPairTest, FDPassPartialRead) {
auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
diff --git a/test/syscalls/linux/stat.cc b/test/syscalls/linux/stat.cc
index 80ba67496..510f7bee5 100644
--- a/test/syscalls/linux/stat.cc
+++ b/test/syscalls/linux/stat.cc
@@ -16,7 +16,9 @@
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/statfs.h>
+#include <sys/types.h>
#include <unistd.h>
+
#include <string>
#include <vector>
@@ -554,6 +556,103 @@ TEST(SimpleStatTest, AnonDeviceAllocatesUniqueInodesAcrossSaveRestore) {
EXPECT_EQ(st2_after.st_ino, st2.st_ino);
}
+#ifndef SYS_statx
+#if defined(__x86_64__)
+#define SYS_statx 332
+#else
+#error "Unknown architecture"
+#endif
+#endif // SYS_statx
+
+#ifndef STATX_ALL
+#define STATX_ALL 0x00000fffU
+#endif // STATX_ALL
+
+// struct kernel_statx_timestamp is a Linux statx_timestamp struct.
+struct kernel_statx_timestamp {
+ int64_t tv_sec;
+ uint32_t tv_nsec;
+ int32_t __reserved;
+};
+
+// struct kernel_statx is a Linux statx struct. Old versions of glibc do not
+// expose it. See include/uapi/linux/stat.h
+struct kernel_statx {
+ uint32_t stx_mask;
+ uint32_t stx_blksize;
+ uint64_t stx_attributes;
+ uint32_t stx_nlink;
+ uint32_t stx_uid;
+ uint32_t stx_gid;
+ uint16_t stx_mode;
+ uint16_t __spare0[1];
+ uint64_t stx_ino;
+ uint64_t stx_size;
+ uint64_t stx_blocks;
+ uint64_t stx_attributes_mask;
+ struct kernel_statx_timestamp stx_atime;
+ struct kernel_statx_timestamp stx_btime;
+ struct kernel_statx_timestamp stx_ctime;
+ struct kernel_statx_timestamp stx_mtime;
+ uint32_t stx_rdev_major;
+ uint32_t stx_rdev_minor;
+ uint32_t stx_dev_major;
+ uint32_t stx_dev_minor;
+ uint64_t __spare2[14];
+};
+
+int statx(int dirfd, const char *pathname, int flags, unsigned int mask,
+ struct kernel_statx *statxbuf) {
+ return syscall(SYS_statx, dirfd, pathname, flags, mask, statxbuf);
+}
+
+TEST_F(StatTest, StatxAbsPath) {
+ SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, 0) < 0 &&
+ errno == ENOSYS);
+
+ struct kernel_statx stx;
+ EXPECT_THAT(statx(-1, test_file_name_.c_str(), 0, STATX_ALL, &stx),
+ SyscallSucceeds());
+ EXPECT_TRUE(S_ISREG(stx.stx_mode));
+}
+
+TEST_F(StatTest, StatxRelPathDirFD) {
+ SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, 0) < 0 &&
+ errno == ENOSYS);
+
+ struct kernel_statx stx;
+ auto const dirfd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(GetAbsoluteTestTmpdir(), O_RDONLY));
+ auto filename = std::string(Basename(test_file_name_));
+
+ EXPECT_THAT(statx(dirfd.get(), filename.c_str(), 0, STATX_ALL, &stx),
+ SyscallSucceeds());
+ EXPECT_TRUE(S_ISREG(stx.stx_mode));
+}
+
+TEST_F(StatTest, StatxRelPathCwd) {
+ SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, 0) < 0 &&
+ errno == ENOSYS);
+
+ ASSERT_THAT(chdir(GetAbsoluteTestTmpdir().c_str()), SyscallSucceeds());
+ auto filename = std::string(Basename(test_file_name_));
+ struct kernel_statx stx;
+ EXPECT_THAT(statx(AT_FDCWD, filename.c_str(), 0, STATX_ALL, &stx),
+ SyscallSucceeds());
+ EXPECT_TRUE(S_ISREG(stx.stx_mode));
+}
+
+TEST_F(StatTest, StatxEmptyPath) {
+ SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, 0) < 0 &&
+ errno == ENOSYS);
+
+ const auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDONLY));
+ struct kernel_statx stx;
+ EXPECT_THAT(statx(fd.get(), "", AT_EMPTY_PATH, STATX_ALL, &stx),
+ SyscallSucceeds());
+ EXPECT_TRUE(S_ISREG(stx.stx_mode));
+}
+
} // namespace
} // namespace testing
diff --git a/test/syscalls/linux/symlink.cc b/test/syscalls/linux/symlink.cc
index 494072a9b..b249ff91f 100644
--- a/test/syscalls/linux/symlink.cc
+++ b/test/syscalls/linux/symlink.cc
@@ -272,6 +272,105 @@ TEST(SymlinkTest, ChmodSymlink) {
EXPECT_EQ(FilePermission(newpath), 0777);
}
+class ParamSymlinkTest : public ::testing::TestWithParam<std::string> {};
+
+// Test that creating an existing symlink with creat will create the target.
+TEST_P(ParamSymlinkTest, CreatLinkCreatesTarget) {
+ const std::string target = GetParam();
+ const std::string linkpath = NewTempAbsPath();
+
+ ASSERT_THAT(symlink(target.c_str(), linkpath.c_str()), SyscallSucceeds());
+
+ int fd;
+ EXPECT_THAT(fd = creat(linkpath.c_str(), 0666), SyscallSucceeds());
+ ASSERT_THAT(close(fd), SyscallSucceeds());
+
+ ASSERT_THAT(chdir(GetAbsoluteTestTmpdir().c_str()), SyscallSucceeds());
+ struct stat st;
+ EXPECT_THAT(stat(target.c_str(), &st), SyscallSucceeds());
+
+ ASSERT_THAT(unlink(linkpath.c_str()), SyscallSucceeds());
+ ASSERT_THAT(unlink(target.c_str()), SyscallSucceeds());
+}
+
+// Test that opening an existing symlink with O_CREAT will create the target.
+TEST_P(ParamSymlinkTest, OpenLinkCreatesTarget) {
+ const std::string target = GetParam();
+ const std::string linkpath = NewTempAbsPath();
+
+ ASSERT_THAT(symlink(target.c_str(), linkpath.c_str()), SyscallSucceeds());
+
+ int fd;
+ EXPECT_THAT(fd = open(linkpath.c_str(), O_CREAT, 0666), SyscallSucceeds());
+ ASSERT_THAT(close(fd), SyscallSucceeds());
+
+ ASSERT_THAT(chdir(GetAbsoluteTestTmpdir().c_str()), SyscallSucceeds());
+ struct stat st;
+ EXPECT_THAT(stat(target.c_str(), &st), SyscallSucceeds());
+
+ ASSERT_THAT(unlink(linkpath.c_str()), SyscallSucceeds());
+ ASSERT_THAT(unlink(target.c_str()), SyscallSucceeds());
+}
+
+// Test that opening a self-symlink with O_CREAT will fail with ELOOP.
+TEST_P(ParamSymlinkTest, CreateExistingSelfLink) {
+ ASSERT_THAT(chdir(GetAbsoluteTestTmpdir().c_str()), SyscallSucceeds());
+
+ const std::string linkpath = GetParam();
+ ASSERT_THAT(symlink(linkpath.c_str(), linkpath.c_str()), SyscallSucceeds());
+
+ EXPECT_THAT(open(linkpath.c_str(), O_CREAT, 0666),
+ SyscallFailsWithErrno(ELOOP));
+
+ ASSERT_THAT(unlink(linkpath.c_str()), SyscallSucceeds());
+}
+
+// Test that opening a file that is a symlink to its parent directory fails
+// with ELOOP.
+TEST_P(ParamSymlinkTest, CreateExistingParentLink) {
+ ASSERT_THAT(chdir(GetAbsoluteTestTmpdir().c_str()), SyscallSucceeds());
+
+ const std::string linkpath = GetParam();
+ const std::string target = JoinPath(linkpath, "child");
+ ASSERT_THAT(symlink(target.c_str(), linkpath.c_str()), SyscallSucceeds());
+
+ EXPECT_THAT(open(linkpath.c_str(), O_CREAT, 0666),
+ SyscallFailsWithErrno(ELOOP));
+
+ ASSERT_THAT(unlink(linkpath.c_str()), SyscallSucceeds());
+}
+
+// Test that opening an existing symlink with O_CREAT|O_EXCL will fail with
+// EEXIST.
+TEST_P(ParamSymlinkTest, OpenLinkExclFails) {
+ const std::string target = GetParam();
+ const std::string linkpath = NewTempAbsPath();
+
+ ASSERT_THAT(symlink(target.c_str(), linkpath.c_str()), SyscallSucceeds());
+
+ EXPECT_THAT(open(linkpath.c_str(), O_CREAT | O_EXCL, 0666),
+ SyscallFailsWithErrno(EEXIST));
+
+ ASSERT_THAT(unlink(linkpath.c_str()), SyscallSucceeds());
+}
+
+// Test that opening an existing symlink with O_CREAT|O_NOFOLLOW will fail with
+// ELOOP.
+TEST_P(ParamSymlinkTest, OpenLinkNoFollowFails) {
+ const std::string target = GetParam();
+ const std::string linkpath = NewTempAbsPath();
+
+ ASSERT_THAT(symlink(target.c_str(), linkpath.c_str()), SyscallSucceeds());
+
+ EXPECT_THAT(open(linkpath.c_str(), O_CREAT | O_NOFOLLOW, 0666),
+ SyscallFailsWithErrno(ELOOP));
+
+ ASSERT_THAT(unlink(linkpath.c_str()), SyscallSucceeds());
+}
+
+INSTANTIATE_TEST_SUITE_P(AbsAndRelTarget, ParamSymlinkTest,
+ ::testing::Values(NewTempAbsPath(), NewTempRelPath()));
+
} // namespace
} // namespace testing
diff --git a/test/syscalls/linux/tcp_socket.cc b/test/syscalls/linux/tcp_socket.cc
index e3f9f9f9d..8d77431f2 100644
--- a/test/syscalls/linux/tcp_socket.cc
+++ b/test/syscalls/linux/tcp_socket.cc
@@ -265,10 +265,16 @@ TEST_P(TcpSocketTest, BlockingLargeWrite_NoRandomSave) {
ScopedThread t([this, &read_bytes]() {
// Avoid interrupting the blocking write in main thread.
const DisableSave ds;
+
+ // Take ownership of the FD so that we close it on failure. This will
+ // unblock the blocking write below.
+ FileDescriptor fd(t_);
+ t_ = -1;
+
char readbuf[2500] = {};
int n = -1;
while (n != 0) {
- EXPECT_THAT(n = RetryEINTR(read)(t_, &readbuf, sizeof(readbuf)),
+ ASSERT_THAT(n = RetryEINTR(read)(fd.get(), &readbuf, sizeof(readbuf)),
SyscallSucceeds());
read_bytes += n;
}
@@ -342,10 +348,16 @@ TEST_P(TcpSocketTest, BlockingLargeSend_NoRandomSave) {
ScopedThread t([this, &read_bytes]() {
// Avoid interrupting the blocking write in main thread.
const DisableSave ds;
+
+ // Take ownership of the FD so that we close it on failure. This will
+ // unblock the blocking write below.
+ FileDescriptor fd(t_);
+ t_ = -1;
+
char readbuf[2500] = {};
int n = -1;
while (n != 0) {
- EXPECT_THAT(n = RetryEINTR(read)(t_, &readbuf, sizeof(readbuf)),
+ ASSERT_THAT(n = RetryEINTR(read)(fd.get(), &readbuf, sizeof(readbuf)),
SyscallSucceeds());
read_bytes += n;
}
@@ -751,6 +763,188 @@ TEST_P(SimpleTcpSocketTest, NonBlockingConnectRefused) {
EXPECT_THAT(close(s.release()), SyscallSucceeds());
}
+// Test that setting a supported congestion control algorithm succeeds for an
+// unconnected TCP socket
+TEST_P(SimpleTcpSocketTest, SetCongestionControlSucceedsForSupported) {
+ // This is Linux's net/tcp.h TCP_CA_NAME_MAX.
+ const int kTcpCaNameMax = 16;
+
+ FileDescriptor s =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+ {
+ const char kSetCC[kTcpCaNameMax] = "reno";
+ ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &kSetCC,
+ strlen(kSetCC)),
+ SyscallSucceedsWithValue(0));
+
+ char got_cc[kTcpCaNameMax];
+ memset(got_cc, '1', sizeof(got_cc));
+ socklen_t optlen = sizeof(got_cc);
+ ASSERT_THAT(
+ getsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &got_cc, &optlen),
+ SyscallSucceedsWithValue(0));
+ // We ignore optlen here as the linux kernel sets optlen to the lower of the
+ // size of the buffer passed in or kTcpCaNameMax and not the length of the
+ // congestion control algorithm's actual name.
+ EXPECT_EQ(0, memcmp(got_cc, kSetCC, sizeof(kTcpCaNameMax)));
+ }
+ {
+ const char kSetCC[kTcpCaNameMax] = "cubic";
+ ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &kSetCC,
+ strlen(kSetCC)),
+ SyscallSucceedsWithValue(0));
+
+ char got_cc[kTcpCaNameMax];
+ memset(got_cc, '1', sizeof(got_cc));
+ socklen_t optlen = sizeof(got_cc);
+ ASSERT_THAT(
+ getsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &got_cc, &optlen),
+ SyscallSucceedsWithValue(0));
+ // We ignore optlen here as the linux kernel sets optlen to the lower of the
+ // size of the buffer passed in or kTcpCaNameMax and not the length of the
+ // congestion control algorithm's actual name.
+ EXPECT_EQ(0, memcmp(got_cc, kSetCC, sizeof(kTcpCaNameMax)));
+ }
+}
+
+// This test verifies that a getsockopt(...TCP_CONGESTION) behaviour is
+// consistent between linux and gvisor when the passed in buffer is smaller than
+// kTcpCaNameMax.
+TEST_P(SimpleTcpSocketTest, SetGetTCPCongestionShortReadBuffer) {
+ FileDescriptor s =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+ {
+ // Verify that getsockopt/setsockopt work with buffers smaller than
+ // kTcpCaNameMax.
+ const char kSetCC[] = "cubic";
+ ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &kSetCC,
+ strlen(kSetCC)),
+ SyscallSucceedsWithValue(0));
+
+ char got_cc[sizeof(kSetCC)];
+ socklen_t optlen = sizeof(got_cc);
+ ASSERT_THAT(
+ getsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &got_cc, &optlen),
+ SyscallSucceedsWithValue(0));
+ EXPECT_EQ(sizeof(got_cc), optlen);
+ EXPECT_EQ(0, memcmp(got_cc, kSetCC, sizeof(got_cc)));
+ }
+}
+
+// This test verifies that a getsockopt(...TCP_CONGESTION) behaviour is
+// consistent between linux and gvisor when the passed in buffer is larger than
+// kTcpCaNameMax.
+TEST_P(SimpleTcpSocketTest, SetGetTCPCongestionLargeReadBuffer) {
+ // This is Linux's net/tcp.h TCP_CA_NAME_MAX.
+ const int kTcpCaNameMax = 16;
+
+ FileDescriptor s =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+ {
+ // Verify that getsockopt works with buffers larger than
+ // kTcpCaNameMax.
+ const char kSetCC[] = "cubic";
+ ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &kSetCC,
+ strlen(kSetCC)),
+ SyscallSucceedsWithValue(0));
+
+ char got_cc[kTcpCaNameMax + 5];
+ socklen_t optlen = sizeof(got_cc);
+ ASSERT_THAT(
+ getsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &got_cc, &optlen),
+ SyscallSucceedsWithValue(0));
+ // Linux copies the minimum of kTcpCaNameMax or the length of the passed in
+ // buffer and sets optlen to the number of bytes actually copied
+ // irrespective of the actual length of the congestion control name.
+ EXPECT_EQ(kTcpCaNameMax, optlen);
+ EXPECT_EQ(0, memcmp(got_cc, kSetCC, sizeof(kSetCC)));
+ }
+}
+
+// Test that setting an unsupported congestion control algorithm fails for an
+// unconnected TCP socket.
+TEST_P(SimpleTcpSocketTest, SetCongestionControlFailsForUnsupported) {
+ // This is Linux's net/tcp.h TCP_CA_NAME_MAX.
+ const int kTcpCaNameMax = 16;
+
+ FileDescriptor s =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+ char old_cc[kTcpCaNameMax];
+ socklen_t optlen = sizeof(old_cc);
+ ASSERT_THAT(
+ getsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &old_cc, &optlen),
+ SyscallSucceedsWithValue(0));
+
+ const char kSetCC[] = "invalid_ca_kSetCC";
+ ASSERT_THAT(
+ setsockopt(s.get(), SOL_TCP, TCP_CONGESTION, &kSetCC, strlen(kSetCC)),
+ SyscallFailsWithErrno(ENOENT));
+
+ char got_cc[kTcpCaNameMax];
+ ASSERT_THAT(
+ getsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &got_cc, &optlen),
+ SyscallSucceedsWithValue(0));
+ // We ignore optlen here as the linux kernel sets optlen to the lower of the
+ // size of the buffer passed in or kTcpCaNameMax and not the length of the
+ // congestion control algorithm's actual name.
+ EXPECT_EQ(0, memcmp(got_cc, old_cc, sizeof(kTcpCaNameMax)));
+}
+
+TEST_P(SimpleTcpSocketTest, MaxSegDefault) {
+ FileDescriptor s =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+
+ constexpr int kDefaultMSS = 536;
+ int tcp_max_seg;
+ socklen_t optlen = sizeof(tcp_max_seg);
+ ASSERT_THAT(
+ getsockopt(s.get(), IPPROTO_TCP, TCP_MAXSEG, &tcp_max_seg, &optlen),
+ SyscallSucceedsWithValue(0));
+
+ EXPECT_EQ(kDefaultMSS, tcp_max_seg);
+ EXPECT_EQ(sizeof(tcp_max_seg), optlen);
+}
+
+TEST_P(SimpleTcpSocketTest, SetMaxSeg) {
+ FileDescriptor s =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+
+ constexpr int kDefaultMSS = 536;
+ constexpr int kTCPMaxSeg = 1024;
+ ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_MAXSEG, &kTCPMaxSeg,
+ sizeof(kTCPMaxSeg)),
+ SyscallSucceedsWithValue(0));
+
+ // Linux actually never returns the user_mss value. It will always return the
+ // default MSS value defined above for an unconnected socket and always return
+ // the actual current MSS for a connected one.
+ int optval;
+ socklen_t optlen = sizeof(optval);
+ ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_MAXSEG, &optval, &optlen),
+ SyscallSucceedsWithValue(0));
+
+ EXPECT_EQ(kDefaultMSS, optval);
+ EXPECT_EQ(sizeof(optval), optlen);
+}
+
+TEST_P(SimpleTcpSocketTest, SetMaxSegFailsForInvalidMSSValues) {
+ FileDescriptor s =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+
+ {
+ constexpr int tcp_max_seg = 10;
+ ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_MAXSEG, &tcp_max_seg,
+ sizeof(tcp_max_seg)),
+ SyscallFailsWithErrno(EINVAL));
+ }
+ {
+ constexpr int tcp_max_seg = 75000;
+ ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_MAXSEG, &tcp_max_seg,
+ sizeof(tcp_max_seg)),
+ SyscallFailsWithErrno(EINVAL));
+ }
+}
+
INSTANTIATE_TEST_SUITE_P(AllInetTests, SimpleTcpSocketTest,
::testing::Values(AF_INET, AF_INET6));
diff --git a/test/syscalls/linux/udp_socket.cc b/test/syscalls/linux/udp_socket.cc
index 31db8a2ad..1bb0307c4 100644
--- a/test/syscalls/linux/udp_socket.cc
+++ b/test/syscalls/linux/udp_socket.cc
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+#include <arpa/inet.h>
#include <fcntl.h>
#include <linux/errqueue.h>
#include <netinet/in.h>
@@ -304,12 +305,50 @@ TEST_P(UdpSocketTest, ReceiveAfterConnect) {
SyscallSucceedsWithValue(sizeof(buf)));
// Receive the data.
- char received[512];
+ char received[sizeof(buf)];
EXPECT_THAT(recv(s_, received, sizeof(received), 0),
SyscallSucceedsWithValue(sizeof(received)));
EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0);
}
+TEST_P(UdpSocketTest, ReceiveAfterDisconnect) {
+ // Connect s_ to loopback:TestPort, and bind t_ to loopback:TestPort.
+ ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds());
+ ASSERT_THAT(bind(t_, addr_[0], addrlen_), SyscallSucceeds());
+ ASSERT_THAT(connect(t_, addr_[1], addrlen_), SyscallSucceeds());
+
+ // Get the address s_ was bound to during connect.
+ struct sockaddr_storage addr;
+ socklen_t addrlen = sizeof(addr);
+ EXPECT_THAT(getsockname(s_, reinterpret_cast<sockaddr*>(&addr), &addrlen),
+ SyscallSucceeds());
+ EXPECT_EQ(addrlen, addrlen_);
+
+ for (int i = 0; i < 2; i++) {
+ // Send from t_ to s_.
+ char buf[512];
+ RandomizeBuffer(buf, sizeof(buf));
+ EXPECT_THAT(getsockname(s_, reinterpret_cast<sockaddr*>(&addr), &addrlen),
+ SyscallSucceeds());
+ ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0,
+ reinterpret_cast<sockaddr*>(&addr), addrlen),
+ SyscallSucceedsWithValue(sizeof(buf)));
+
+ // Receive the data.
+ char received[sizeof(buf)];
+ EXPECT_THAT(recv(s_, received, sizeof(received), 0),
+ SyscallSucceedsWithValue(sizeof(received)));
+ EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0);
+
+ // Disconnect s_.
+ struct sockaddr addr = {};
+ addr.sa_family = AF_UNSPEC;
+ ASSERT_THAT(connect(s_, &addr, sizeof(addr.sa_family)), SyscallSucceeds());
+ // Connect s_ loopback:TestPort.
+ ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds());
+ }
+}
+
TEST_P(UdpSocketTest, Connect) {
ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds());
@@ -335,6 +374,112 @@ TEST_P(UdpSocketTest, Connect) {
EXPECT_EQ(memcmp(&peer, addr_[2], addrlen_), 0);
}
+TEST_P(UdpSocketTest, DisconnectAfterBind) {
+ ASSERT_THAT(bind(s_, addr_[1], addrlen_), SyscallSucceeds());
+ // Connect the socket.
+ ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds());
+
+ struct sockaddr_storage addr = {};
+ addr.ss_family = AF_UNSPEC;
+ EXPECT_THAT(
+ connect(s_, reinterpret_cast<sockaddr*>(&addr), sizeof(addr.ss_family)),
+ SyscallSucceeds());
+
+ // Check that we're still bound.
+ socklen_t addrlen = sizeof(addr);
+ EXPECT_THAT(getsockname(s_, reinterpret_cast<sockaddr*>(&addr), &addrlen),
+ SyscallSucceeds());
+
+ EXPECT_EQ(addrlen, addrlen_);
+ EXPECT_EQ(memcmp(&addr, addr_[1], addrlen_), 0);
+
+ addrlen = sizeof(addr);
+ EXPECT_THAT(getpeername(s_, reinterpret_cast<sockaddr*>(&addr), &addrlen),
+ SyscallFailsWithErrno(ENOTCONN));
+}
+
+TEST_P(UdpSocketTest, DisconnectAfterBindToAny) {
+ struct sockaddr_storage baddr = {};
+ socklen_t addrlen;
+ auto port = *Port(reinterpret_cast<struct sockaddr_storage*>(addr_[1]));
+ if (addr_[0]->sa_family == AF_INET) {
+ auto addr_in = reinterpret_cast<struct sockaddr_in*>(&baddr);
+ addr_in->sin_family = AF_INET;
+ addr_in->sin_port = port;
+ inet_pton(AF_INET, "0.0.0.0",
+ reinterpret_cast<void*>(&addr_in->sin_addr.s_addr));
+ } else {
+ auto addr_in = reinterpret_cast<struct sockaddr_in6*>(&baddr);
+ addr_in->sin6_family = AF_INET6;
+ addr_in->sin6_port = port;
+ inet_pton(AF_INET6,
+ "::", reinterpret_cast<void*>(&addr_in->sin6_addr.s6_addr));
+ addr_in->sin6_scope_id = 0;
+ }
+ ASSERT_THAT(bind(s_, reinterpret_cast<sockaddr*>(&baddr), addrlen_),
+ SyscallSucceeds());
+ // Connect the socket.
+ ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds());
+
+ struct sockaddr_storage addr = {};
+ addr.ss_family = AF_UNSPEC;
+ EXPECT_THAT(
+ connect(s_, reinterpret_cast<sockaddr*>(&addr), sizeof(addr.ss_family)),
+ SyscallSucceeds());
+
+ // Check that we're still bound.
+ addrlen = sizeof(addr);
+ EXPECT_THAT(getsockname(s_, reinterpret_cast<sockaddr*>(&addr), &addrlen),
+ SyscallSucceeds());
+
+ EXPECT_EQ(addrlen, addrlen_);
+ EXPECT_EQ(memcmp(&addr, &baddr, addrlen), 0);
+
+ addrlen = sizeof(addr);
+ EXPECT_THAT(getpeername(s_, reinterpret_cast<sockaddr*>(&addr), &addrlen),
+ SyscallFailsWithErrno(ENOTCONN));
+}
+
+TEST_P(UdpSocketTest, Disconnect) {
+ for (int i = 0; i < 2; i++) {
+ // Try to connect again.
+ EXPECT_THAT(connect(s_, addr_[2], addrlen_), SyscallSucceeds());
+
+ // Check that we're connected to the right peer.
+ struct sockaddr_storage peer;
+ socklen_t peerlen = sizeof(peer);
+ EXPECT_THAT(getpeername(s_, reinterpret_cast<sockaddr*>(&peer), &peerlen),
+ SyscallSucceeds());
+ EXPECT_EQ(peerlen, addrlen_);
+ EXPECT_EQ(memcmp(&peer, addr_[2], addrlen_), 0);
+
+ // Try to disconnect.
+ struct sockaddr_storage addr = {};
+ addr.ss_family = AF_UNSPEC;
+ EXPECT_THAT(
+ connect(s_, reinterpret_cast<sockaddr*>(&addr), sizeof(addr.ss_family)),
+ SyscallSucceeds());
+
+ peerlen = sizeof(peer);
+ EXPECT_THAT(getpeername(s_, reinterpret_cast<sockaddr*>(&peer), &peerlen),
+ SyscallFailsWithErrno(ENOTCONN));
+
+ // Check that we're still bound.
+ socklen_t addrlen = sizeof(addr);
+ EXPECT_THAT(getsockname(s_, reinterpret_cast<sockaddr*>(&addr), &addrlen),
+ SyscallSucceeds());
+ EXPECT_EQ(addrlen, addrlen_);
+ EXPECT_EQ(*Port(&addr), 0);
+ }
+}
+
+TEST_P(UdpSocketTest, ConnectBadAddress) {
+ struct sockaddr addr = {};
+ addr.sa_family = addr_[0]->sa_family;
+ ASSERT_THAT(connect(s_, &addr, sizeof(addr.sa_family)),
+ SyscallFailsWithErrno(EINVAL));
+}
+
TEST_P(UdpSocketTest, SendToAddressOtherThanConnected) {
ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds());
@@ -397,7 +542,7 @@ TEST_P(UdpSocketTest, SendAndReceiveNotConnected) {
SyscallSucceedsWithValue(sizeof(buf)));
// Receive the data.
- char received[512];
+ char received[sizeof(buf)];
EXPECT_THAT(recv(s_, received, sizeof(received), 0),
SyscallSucceedsWithValue(sizeof(received)));
EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0);
@@ -419,7 +564,7 @@ TEST_P(UdpSocketTest, SendAndReceiveConnected) {
SyscallSucceedsWithValue(sizeof(buf)));
// Receive the data.
- char received[512];
+ char received[sizeof(buf)];
EXPECT_THAT(recv(s_, received, sizeof(received), 0),
SyscallSucceedsWithValue(sizeof(received)));
EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0);
@@ -462,7 +607,7 @@ TEST_P(UdpSocketTest, ReceiveBeforeConnect) {
ASSERT_THAT(connect(s_, addr_[1], addrlen_), SyscallSucceeds());
// Receive the data. It works because it was sent before the connect.
- char received[512];
+ char received[sizeof(buf)];
EXPECT_THAT(recv(s_, received, sizeof(received), 0),
SyscallSucceedsWithValue(sizeof(received)));
EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0);
@@ -491,7 +636,7 @@ TEST_P(UdpSocketTest, ReceiveFrom) {
SyscallSucceedsWithValue(sizeof(buf)));
// Receive the data and sender address.
- char received[512];
+ char received[sizeof(buf)];
struct sockaddr_storage addr;
socklen_t addrlen = sizeof(addr);
EXPECT_THAT(recvfrom(s_, received, sizeof(received), 0,
diff --git a/test/syscalls/linux/unix_domain_socket_test_util.cc b/test/syscalls/linux/unix_domain_socket_test_util.cc
index 6f49e3660..7fb9eed8d 100644
--- a/test/syscalls/linux/unix_domain_socket_test_util.cc
+++ b/test/syscalls/linux/unix_domain_socket_test_util.cc
@@ -47,38 +47,40 @@ std::string DescribeUnixDomainSocketType(int type) {
}
SocketPairKind UnixDomainSocketPair(int type) {
- return SocketPairKind{DescribeUnixDomainSocketType(type),
+ return SocketPairKind{DescribeUnixDomainSocketType(type), AF_UNIX, type, 0,
SyscallSocketPairCreator(AF_UNIX, type, 0)};
}
SocketPairKind FilesystemBoundUnixDomainSocketPair(int type) {
std::string description = absl::StrCat(DescribeUnixDomainSocketType(type),
- " created with filesystem binding");
+ " created with filesystem binding");
if ((type & SOCK_DGRAM) == SOCK_DGRAM) {
return SocketPairKind{
- description,
+ description, AF_UNIX, type, 0,
FilesystemBidirectionalBindSocketPairCreator(AF_UNIX, type, 0)};
}
return SocketPairKind{
- description, FilesystemAcceptBindSocketPairCreator(AF_UNIX, type, 0)};
+ description, AF_UNIX, type, 0,
+ FilesystemAcceptBindSocketPairCreator(AF_UNIX, type, 0)};
}
SocketPairKind AbstractBoundUnixDomainSocketPair(int type) {
- std::string description = absl::StrCat(DescribeUnixDomainSocketType(type),
- " created with abstract namespace binding");
+ std::string description =
+ absl::StrCat(DescribeUnixDomainSocketType(type),
+ " created with abstract namespace binding");
if ((type & SOCK_DGRAM) == SOCK_DGRAM) {
return SocketPairKind{
- description,
+ description, AF_UNIX, type, 0,
AbstractBidirectionalBindSocketPairCreator(AF_UNIX, type, 0)};
}
- return SocketPairKind{description,
+ return SocketPairKind{description, AF_UNIX, type, 0,
AbstractAcceptBindSocketPairCreator(AF_UNIX, type, 0)};
}
SocketPairKind SocketpairGoferUnixDomainSocketPair(int type) {
std::string description = absl::StrCat(DescribeUnixDomainSocketType(type),
- " created with the socketpair gofer");
- return SocketPairKind{description,
+ " created with the socketpair gofer");
+ return SocketPairKind{description, AF_UNIX, type, 0,
SocketpairGoferSocketPairCreator(AF_UNIX, type, 0)};
}
@@ -87,13 +89,15 @@ SocketPairKind SocketpairGoferFileSocketPair(int type) {
absl::StrCat(((type & O_NONBLOCK) != 0) ? "non-blocking " : "",
((type & O_CLOEXEC) != 0) ? "close-on-exec " : "",
"file socket created with the socketpair gofer");
- return SocketPairKind{description,
+ // The socketpair gofer always creates SOCK_STREAM sockets on open(2).
+ return SocketPairKind{description, AF_UNIX, SOCK_STREAM, 0,
SocketpairGoferFileSocketPairCreator(type)};
}
SocketPairKind FilesystemUnboundUnixDomainSocketPair(int type) {
return SocketPairKind{absl::StrCat(DescribeUnixDomainSocketType(type),
" unbound with a filesystem address"),
+ AF_UNIX, type, 0,
FilesystemUnboundSocketPairCreator(AF_UNIX, type, 0)};
}
@@ -101,7 +105,7 @@ SocketPairKind AbstractUnboundUnixDomainSocketPair(int type) {
return SocketPairKind{
absl::StrCat(DescribeUnixDomainSocketType(type),
" unbound with an abstract namespace address"),
- AbstractUnboundSocketPairCreator(AF_UNIX, type, 0)};
+ AF_UNIX, type, 0, AbstractUnboundSocketPairCreator(AF_UNIX, type, 0)};
}
void SendSingleFD(int sock, int fd, char buf[], int buf_size) {
diff --git a/test/syscalls/linux/unix_domain_socket_test_util.h b/test/syscalls/linux/unix_domain_socket_test_util.h
index aae990245..5eca0b7f0 100644
--- a/test/syscalls/linux/unix_domain_socket_test_util.h
+++ b/test/syscalls/linux/unix_domain_socket_test_util.h
@@ -21,7 +21,7 @@
namespace gvisor {
namespace testing {
-// DescribeUnixDomainSocketType returns a human-readable std::string explaining the
+// DescribeUnixDomainSocketType returns a human-readable string explaining the
// given Unix domain socket type.
std::string DescribeUnixDomainSocketType(int type);
@@ -40,7 +40,7 @@ SocketPairKind FilesystemBoundUnixDomainSocketPair(int type);
SocketPairKind AbstractBoundUnixDomainSocketPair(int type);
// SocketpairGoferUnixDomainSocketPair returns a SocketPairKind that was created
-// with two sockets conected to the socketpair gofer.
+// with two sockets connected to the socketpair gofer.
SocketPairKind SocketpairGoferUnixDomainSocketPair(int type);
// SocketpairGoferFileSocketPair returns a SocketPairKind that was created with
diff --git a/test/syscalls/syscall_test_runner.go b/test/syscalls/syscall_test_runner.go
index 476248184..5936d66ff 100644
--- a/test/syscalls/syscall_test_runner.go
+++ b/test/syscalls/syscall_test_runner.go
@@ -31,10 +31,10 @@ import (
specs "github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/sys/unix"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/runsc/specutils"
- "gvisor.googlesource.com/gvisor/runsc/test/testutil"
- "gvisor.googlesource.com/gvisor/test/syscalls/gtest"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/runsc/specutils"
+ "gvisor.dev/gvisor/runsc/test/testutil"
+ "gvisor.dev/gvisor/test/syscalls/gtest"
)
// Location of syscall tests, relative to the repo root.
diff --git a/test/util/capability_util.h b/test/util/capability_util.h
index e968a2583..bb9ea1fe5 100644
--- a/test/util/capability_util.h
+++ b/test/util/capability_util.h
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-// Utilities for testing capabilties.
+// Utilities for testing capabilities.
#ifndef GVISOR_TEST_UTIL_CAPABILITY_UTIL_H_
#define GVISOR_TEST_UTIL_CAPABILITY_UTIL_H_
diff --git a/test/util/fs_util.cc b/test/util/fs_util.cc
index bc90bd78e..ae49725a0 100644
--- a/test/util/fs_util.cc
+++ b/test/util/fs_util.cc
@@ -290,7 +290,7 @@ PosixError WalkTree(
}
PosixErrorOr<std::vector<std::string>> ListDir(absl::string_view abspath,
- bool skipdots) {
+ bool skipdots) {
std::vector<std::string> files;
DIR* dir = opendir(std::string(abspath).c_str());
@@ -381,7 +381,7 @@ PosixError RecursivelyCreateDir(absl::string_view path) {
// Makes a path absolute with respect to an optional base. If no base is
// provided it will use the current working directory.
PosixErrorOr<std::string> MakeAbsolute(absl::string_view filename,
- absl::string_view base) {
+ absl::string_view base) {
if (filename.empty()) {
return PosixError(EINVAL, "filename cannot be empty.");
}
@@ -494,7 +494,7 @@ std::string CleanPath(const absl::string_view unclean_path) {
}
PosixErrorOr<std::string> GetRelativePath(absl::string_view source,
- absl::string_view dest) {
+ absl::string_view dest) {
if (!absl::StartsWith(source, "/") || !absl::StartsWith(dest, "/")) {
// At least one of the inputs is not an absolute path.
return PosixError(
diff --git a/test/util/fs_util.h b/test/util/fs_util.h
index eb7cdaa24..3969f8309 100644
--- a/test/util/fs_util.h
+++ b/test/util/fs_util.h
@@ -61,7 +61,7 @@ PosixError SetContents(absl::string_view path, absl::string_view contents);
PosixError CreateWithContents(absl::string_view path,
absl::string_view contents, int mode = 0666);
-// Attempts to read the entire contents of the file into the provided std::string
+// Attempts to read the entire contents of the file into the provided string
// buffer or returns an error.
PosixError GetContents(absl::string_view path, std::string* output);
@@ -69,7 +69,7 @@ PosixError GetContents(absl::string_view path, std::string* output);
PosixErrorOr<std::string> GetContents(absl::string_view path);
// Attempts to read the entire contents of the provided fd into the provided
-// std::string or returns an error.
+// string or returns an error.
PosixError GetContentsFD(int fd, std::string* output);
// Attempts to read the entire contents of the provided fd or returns an error.
@@ -94,7 +94,7 @@ PosixError WalkTree(
// method does not walk the tree recursively it only returns the elements
// in that directory.
PosixErrorOr<std::vector<std::string>> ListDir(absl::string_view abspath,
- bool skipdots);
+ bool skipdots);
// Attempt to recursively delete a directory or file. Returns an error and
// the number of undeleted directories and files. If either
@@ -108,20 +108,20 @@ PosixError RecursivelyCreateDir(absl::string_view path);
// Makes a path absolute with respect to an optional base. If no base is
// provided it will use the current working directory.
PosixErrorOr<std::string> MakeAbsolute(absl::string_view filename,
- absl::string_view base);
+ absl::string_view base);
// Generates a relative path from the source directory to the destination
// (dest) file or directory. This uses ../ when necessary for destinations
// which are not nested within the source. Both source and dest are required
-// to be absolute paths, and an empty std::string will be returned if they are not.
+// to be absolute paths, and an empty string will be returned if they are not.
PosixErrorOr<std::string> GetRelativePath(absl::string_view source,
- absl::string_view dest);
+ absl::string_view dest);
// Returns the part of the path before the final "/", EXCEPT:
// * If there is a single leading "/" in the path, the result will be the
// leading "/".
// * If there is no "/" in the path, the result is the empty prefix of the
-// input std::string.
+// input string.
absl::string_view Dirname(absl::string_view path);
// Return the parts of the path, split on the final "/". If there is no
@@ -135,7 +135,7 @@ std::pair<absl::string_view, absl::string_view> SplitPath(
// "/" in the path, the result is the same as the input.
// Note that this function's behavior differs from the Unix basename
// command if path ends with "/". For such paths, this function returns the
-// empty std::string.
+// empty string.
absl::string_view Basename(absl::string_view path);
// Collapse duplicate "/"s, resolve ".." and "." path elements, remove
@@ -144,7 +144,7 @@ absl::string_view Basename(absl::string_view path);
// NOTE: This respects relative vs. absolute paths, but does not
// invoke any system calls (getcwd(2)) in order to resolve relative
// paths wrt actual working directory. That is, this is purely a
-// std::string manipulation, completely independent of process state.
+// string manipulation, completely independent of process state.
std::string CleanPath(absl::string_view path);
// Returns the full path to the executable of the given pid or a PosixError.
@@ -174,7 +174,7 @@ inline std::string JoinPath(absl::string_view path) {
std::string JoinPath(absl::string_view path1, absl::string_view path2);
template <typename... T>
inline std::string JoinPath(absl::string_view path1, absl::string_view path2,
- absl::string_view path3, const T&... args) {
+ absl::string_view path3, const T&... args) {
return internal::JoinPathImpl({path1, path2, path3, args...});
}
} // namespace testing
diff --git a/test/util/fs_util_test.cc b/test/util/fs_util_test.cc
index 4e12076a1..2a200320a 100644
--- a/test/util/fs_util_test.cc
+++ b/test/util/fs_util_test.cc
@@ -29,7 +29,8 @@ namespace {
TEST(FsUtilTest, RecursivelyCreateDirManualDelete) {
const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
- const std::string base_path = JoinPath(root.path(), "/a/b/c/d/e/f/g/h/i/j/k/l/m");
+ const std::string base_path =
+ JoinPath(root.path(), "/a/b/c/d/e/f/g/h/i/j/k/l/m");
ASSERT_THAT(Exists(base_path), IsPosixErrorOkAndHolds(false));
ASSERT_NO_ERRNO(RecursivelyCreateDir(base_path));
@@ -48,7 +49,8 @@ TEST(FsUtilTest, RecursivelyCreateDirManualDelete) {
TEST(FsUtilTest, RecursivelyCreateAndDeleteDir) {
const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
- const std::string base_path = JoinPath(root.path(), "/a/b/c/d/e/f/g/h/i/j/k/l/m");
+ const std::string base_path =
+ JoinPath(root.path(), "/a/b/c/d/e/f/g/h/i/j/k/l/m");
ASSERT_THAT(Exists(base_path), IsPosixErrorOkAndHolds(false));
ASSERT_NO_ERRNO(RecursivelyCreateDir(base_path));
@@ -60,7 +62,8 @@ TEST(FsUtilTest, RecursivelyCreateAndDeleteDir) {
TEST(FsUtilTest, RecursivelyCreateAndDeletePartial) {
const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
- const std::string base_path = JoinPath(root.path(), "/a/b/c/d/e/f/g/h/i/j/k/l/m");
+ const std::string base_path =
+ JoinPath(root.path(), "/a/b/c/d/e/f/g/h/i/j/k/l/m");
ASSERT_THAT(Exists(base_path), IsPosixErrorOkAndHolds(false));
ASSERT_NO_ERRNO(RecursivelyCreateDir(base_path));
diff --git a/test/util/logging.cc b/test/util/logging.cc
index cc71d77b0..5d5e76c46 100644
--- a/test/util/logging.cc
+++ b/test/util/logging.cc
@@ -50,7 +50,7 @@ int WriteNumber(int fd, uint32_t val) {
constexpr int kBufferSize = 11;
char buf[kBufferSize];
- // Convert the number to std::string.
+ // Convert the number to string.
char* s = buf + sizeof(buf) - 1;
size_t size = 0;
diff --git a/test/util/memory_util.h b/test/util/memory_util.h
index 8c77778ea..190c469b5 100644
--- a/test/util/memory_util.h
+++ b/test/util/memory_util.h
@@ -118,6 +118,17 @@ inline PosixErrorOr<Mapping> MmapAnon(size_t length, int prot, int flags) {
return Mmap(nullptr, length, prot, flags | MAP_ANONYMOUS, -1, 0);
}
+// Returns true if the page containing addr is mapped.
+inline bool IsMapped(uintptr_t addr) {
+ int const rv = msync(reinterpret_cast<void*>(addr & ~(kPageSize - 1)),
+ kPageSize, MS_ASYNC);
+ if (rv == 0) {
+ return true;
+ }
+ TEST_PCHECK_MSG(errno == ENOMEM, "msync failed with unexpected errno");
+ return false;
+}
+
} // namespace testing
} // namespace gvisor
diff --git a/test/util/mount_util.h b/test/util/mount_util.h
index 7782e6bf2..38ec6c8a1 100644
--- a/test/util/mount_util.h
+++ b/test/util/mount_util.h
@@ -30,9 +30,11 @@ namespace testing {
// Mount mounts the filesystem, and unmounts when the returned reference is
// destroyed.
-inline PosixErrorOr<Cleanup> Mount(const std::string &source, const std::string &target,
+inline PosixErrorOr<Cleanup> Mount(const std::string &source,
+ const std::string &target,
const std::string &fstype, uint64_t mountflags,
- const std::string &data, uint64_t umountflags) {
+ const std::string &data,
+ uint64_t umountflags) {
if (mount(source.c_str(), target.c_str(), fstype.c_str(), mountflags,
data.c_str()) == -1) {
return PosixError(errno, "mount failed");
diff --git a/test/util/posix_error.h b/test/util/posix_error.h
index b604f4f8f..ad666bce0 100644
--- a/test/util/posix_error.h
+++ b/test/util/posix_error.h
@@ -51,7 +51,7 @@ class ABSL_MUST_USE_RESULT PosixError {
std::string error_message() const { return msg_; }
- // ToString produces a full std::string representation of this posix error
+ // ToString produces a full string representation of this posix error
// including the printable representation of the errno and the error message.
std::string ToString() const;
diff --git a/test/util/proc_util.cc b/test/util/proc_util.cc
index 9d4db37c3..75b24da37 100644
--- a/test/util/proc_util.cc
+++ b/test/util/proc_util.cc
@@ -76,7 +76,8 @@ PosixErrorOr<ProcMapsEntry> ParseProcMapsLine(absl::string_view line) {
if (parts.size() == 6) {
// A filename is present. However, absl::StrSplit retained the whitespace
// between the inode number and the filename.
- map_entry.filename = std::string(absl::StripLeadingAsciiWhitespace(parts[5]));
+ map_entry.filename =
+ std::string(absl::StripLeadingAsciiWhitespace(parts[5]));
}
return map_entry;
diff --git a/test/util/temp_path.cc b/test/util/temp_path.cc
index de7c04a6f..35aacb172 100644
--- a/test/util/temp_path.cc
+++ b/test/util/temp_path.cc
@@ -70,13 +70,16 @@ std::string NewTempAbsPathInDir(absl::string_view const dir) {
return JoinPath(dir, NextTempBasename());
}
-std::string NewTempAbsPath() { return NewTempAbsPathInDir(GetAbsoluteTestTmpdir()); }
+std::string NewTempAbsPath() {
+ return NewTempAbsPathInDir(GetAbsoluteTestTmpdir());
+}
std::string NewTempRelPath() { return NextTempBasename(); }
std::string GetAbsoluteTestTmpdir() {
char* env_tmpdir = getenv("TEST_TMPDIR");
- std::string tmp_dir = env_tmpdir != nullptr ? std::string(env_tmpdir) : "/tmp";
+ std::string tmp_dir =
+ env_tmpdir != nullptr ? std::string(env_tmpdir) : "/tmp";
return MakeAbsolute(tmp_dir, "").ValueOrDie();
}
diff --git a/test/util/temp_path.h b/test/util/temp_path.h
index 89302e0fd..92d669503 100644
--- a/test/util/temp_path.h
+++ b/test/util/temp_path.h
@@ -30,7 +30,7 @@ namespace testing {
// Distinct calls to NewTempAbsPathInDir from the same process, even from
// multiple threads, are guaranteed to return different paths. Distinct calls to
// NewTempAbsPathInDir from different processes are not synchronized.
-std::string NewTempAbsPathInDir(absl::string_view base);
+std::string NewTempAbsPathInDir(absl::string_view const dir);
// Like NewTempAbsPathInDir, but the returned path is in the test's temporary
// directory, as provided by the testing framework.
@@ -105,7 +105,7 @@ class TempPath {
// Changes the path this TempPath represents. If the TempPath already
// represented a path, deletes and returns that path. Otherwise returns the
- // empty std::string.
+ // empty string.
std::string reset(std::string newpath);
std::string reset() { return reset(""); }
diff --git a/test/util/test_util.cc b/test/util/test_util.cc
index bf0029951..e42bba04a 100644
--- a/test/util/test_util.cc
+++ b/test/util/test_util.cc
@@ -73,7 +73,7 @@ Platform GvisorPlatform() {
CPUVendor GetCPUVendor() {
uint32_t eax, ebx, ecx, edx;
std::string vendor_str;
- // Get vendor std::string (issue CPUID with eax = 0)
+ // Get vendor string (issue CPUID with eax = 0)
GETCPUID(eax, ebx, ecx, edx, 0, 0);
vendor_str.append(reinterpret_cast<char*>(&ebx), 4);
vendor_str.append(reinterpret_cast<char*>(&edx), 4);
@@ -93,7 +93,8 @@ bool operator==(const KernelVersion& first, const KernelVersion& second) {
PosixErrorOr<KernelVersion> ParseKernelVersion(absl::string_view vers_str) {
KernelVersion version = {};
- std::vector<std::string> values = absl::StrSplit(vers_str, absl::ByAnyChar(".-"));
+ std::vector<std::string> values =
+ absl::StrSplit(vers_str, absl::ByAnyChar(".-"));
if (values.size() == 2) {
ASSIGN_OR_RETURN_ERRNO(version.major, Atoi<int>(values[0]));
ASSIGN_OR_RETURN_ERRNO(version.minor, Atoi<int>(values[1]));
diff --git a/test/util/timer_util.h b/test/util/timer_util.h
index 2cebfa5d1..31aea4fc6 100644
--- a/test/util/timer_util.h
+++ b/test/util/timer_util.h
@@ -30,7 +30,7 @@
namespace gvisor {
namespace testing {
-// MonotonicTimer is a simple timer that uses a monotic clock.
+// MonotonicTimer is a simple timer that uses a monotonic clock.
class MonotonicTimer {
public:
MonotonicTimer() {}
diff --git a/third_party/gvsync/BUILD b/third_party/gvsync/BUILD
index 04a1fbeba..8dab51daa 100644
--- a/third_party/gvsync/BUILD
+++ b/third_party/gvsync/BUILD
@@ -40,7 +40,7 @@ go_library(
"race_unsafe.go",
"seqcount.go",
],
- importpath = "gvisor.googlesource.com/gvisor/third_party/gvsync",
+ importpath = "gvisor.dev/gvisor/third_party/gvsync",
)
go_test(
diff --git a/third_party/gvsync/atomicptr_unsafe.go b/third_party/gvsync/atomicptr_unsafe.go
index 53a943282..525c4beed 100644
--- a/third_party/gvsync/atomicptr_unsafe.go
+++ b/third_party/gvsync/atomicptr_unsafe.go
@@ -21,8 +21,18 @@ type Value struct{}
// Note that copying AtomicPtr by value performs a non-atomic read of the
// stored pointer, which is unsafe if Store() can be called concurrently; in
// this case, do `dst.Store(src.Load())` instead.
+//
+// +stateify savable
type AtomicPtr struct {
- ptr unsafe.Pointer
+ ptr unsafe.Pointer `state:".(*Value)"`
+}
+
+func (p *AtomicPtr) savePtr() *Value {
+ return p.Load()
+}
+
+func (p *AtomicPtr) loadPtr(v *Value) {
+ p.Store(v)
}
// Load returns the value set by the most recent Store. It returns nil if there
diff --git a/third_party/gvsync/atomicptrtest/BUILD b/third_party/gvsync/atomicptrtest/BUILD
index 74c51fd18..6cf69ea91 100644
--- a/third_party/gvsync/atomicptrtest/BUILD
+++ b/third_party/gvsync/atomicptrtest/BUILD
@@ -6,7 +6,7 @@ load("//tools/go_generics:defs.bzl", "go_template_instance")
go_template_instance(
name = "atomicptr_int",
- out = "atomicptr_int.go",
+ out = "atomicptr_int_unsafe.go",
package = "atomicptr",
suffix = "Int",
template = "//third_party/gvsync:generic_atomicptr",
@@ -17,8 +17,8 @@ go_template_instance(
go_library(
name = "atomicptr",
- srcs = ["atomicptr_int.go"],
- importpath = "gvisor.googlesource.com/gvisor/third_party/gvsync/atomicptr",
+ srcs = ["atomicptr_int_unsafe.go"],
+ importpath = "gvisor.dev/gvisor/third_party/gvsync/atomicptr",
)
go_test(
diff --git a/third_party/gvsync/seqatomic_unsafe.go b/third_party/gvsync/seqatomic_unsafe.go
index c52d378f1..382eeed43 100644
--- a/third_party/gvsync/seqatomic_unsafe.go
+++ b/third_party/gvsync/seqatomic_unsafe.go
@@ -13,7 +13,7 @@ import (
"strings"
"unsafe"
- "gvisor.googlesource.com/gvisor/third_party/gvsync"
+ "gvisor.dev/gvisor/third_party/gvsync"
)
// Value is a required type parameter.
diff --git a/third_party/gvsync/seqatomictest/BUILD b/third_party/gvsync/seqatomictest/BUILD
index d83149e81..9e87e0bc5 100644
--- a/third_party/gvsync/seqatomictest/BUILD
+++ b/third_party/gvsync/seqatomictest/BUILD
@@ -6,7 +6,7 @@ load("//tools/go_generics:defs.bzl", "go_template_instance")
go_template_instance(
name = "seqatomic_int",
- out = "seqatomic_int.go",
+ out = "seqatomic_int_unsafe.go",
package = "seqatomic",
suffix = "Int",
template = "//third_party/gvsync:generic_seqatomic",
@@ -17,8 +17,8 @@ go_template_instance(
go_library(
name = "seqatomic",
- srcs = ["seqatomic_int.go"],
- importpath = "gvisor.googlesource.com/gvisor/third_party/gvsync/seqatomic",
+ srcs = ["seqatomic_int_unsafe.go"],
+ importpath = "gvisor.dev/gvisor/third_party/gvsync/seqatomic",
deps = [
"//third_party/gvsync",
],
diff --git a/third_party/gvsync/seqatomictest/seqatomic_test.go b/third_party/gvsync/seqatomictest/seqatomic_test.go
index 2da73cf96..a5447f589 100644
--- a/third_party/gvsync/seqatomictest/seqatomic_test.go
+++ b/third_party/gvsync/seqatomictest/seqatomic_test.go
@@ -19,7 +19,7 @@ import (
"testing"
"time"
- "gvisor.googlesource.com/gvisor/third_party/gvsync"
+ "gvisor.dev/gvisor/third_party/gvsync"
)
func TestSeqAtomicLoadUncontended(t *testing.T) {
diff --git a/tools/checkunsafe/BUILD b/tools/checkunsafe/BUILD
new file mode 100644
index 000000000..d85c56131
--- /dev/null
+++ b/tools/checkunsafe/BUILD
@@ -0,0 +1,13 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_tool_library")
+
+package(licenses = ["notice"])
+
+go_tool_library(
+ name = "checkunsafe",
+ srcs = ["check_unsafe.go"],
+ importpath = "checkunsafe",
+ visibility = ["//visibility:public"],
+ deps = [
+ "@org_golang_x_tools//go/analysis:go_tool_library",
+ ],
+)
diff --git a/tools/checkunsafe/check_unsafe.go b/tools/checkunsafe/check_unsafe.go
new file mode 100644
index 000000000..4ccd7cc5a
--- /dev/null
+++ b/tools/checkunsafe/check_unsafe.go
@@ -0,0 +1,56 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package checkunsafe allows unsafe imports only in files named appropriately.
+package checkunsafe
+
+import (
+ "fmt"
+ "path"
+ "strconv"
+ "strings"
+
+ "golang.org/x/tools/go/analysis"
+)
+
+// Analyzer defines the entrypoint.
+var Analyzer = &analysis.Analyzer{
+ Name: "checkunsafe",
+ Doc: "allows unsafe use only in specified files",
+ Run: run,
+}
+
+func run(pass *analysis.Pass) (interface{}, error) {
+ for _, f := range pass.Files {
+ for _, imp := range f.Imports {
+ // Is this an unsafe import?
+ pkg, err := strconv.Unquote(imp.Path.Value)
+ if err != nil || pkg != "unsafe" {
+ continue
+ }
+
+ // Extract the filename.
+ filename := pass.Fset.File(imp.Pos()).Name()
+
+ // Allow files named _unsafe.go or _test.go to opt out.
+ if strings.HasSuffix(filename, "_unsafe.go") || strings.HasSuffix(filename, "_test.go") {
+ continue
+ }
+
+ // Throw the error.
+ pass.Reportf(imp.Pos(), fmt.Sprintf("package unsafe imported by %s; must end with _unsafe.go", path.Base(filename)))
+ }
+ }
+ return nil, nil
+}
diff --git a/tools/go_branch.sh b/tools/go_branch.sh
index 8ea6a6d8d..d9e79401d 100755
--- a/tools/go_branch.sh
+++ b/tools/go_branch.sh
@@ -19,6 +19,7 @@ set -eo pipefail
# Discovery the package name from the go.mod file.
declare -r gomod="$(pwd)/go.mod"
declare -r module=$(cat "${gomod}" | grep -E "^module" | cut -d' ' -f2)
+declare -r gosum="$(pwd)/go.sum"
# Check that gopath has been built.
declare -r gopath_dir="$(pwd)/bazel-bin/gopath/src/${module}"
@@ -63,6 +64,7 @@ git merge --allow-unrelated-histories --no-commit --strategy ours ${head}
# Sync the entire gopath_dir and go.mod.
rsync --recursive --verbose --delete --exclude .git --exclude README.md -L "${gopath_dir}/" .
cp "${gomod}" .
+cp "${gosum}" .
# There are a few solitary files that can get left behind due to the way bazel
# constructs the gopath target. Note that we don't find all Go files here
diff --git a/tools/go_generics/generics.go b/tools/go_generics/generics.go
index ca414d8cb..22c714c13 100644
--- a/tools/go_generics/generics.go
+++ b/tools/go_generics/generics.go
@@ -98,7 +98,7 @@ import (
"regexp"
"strings"
- "gvisor.googlesource.com/gvisor/tools/go_generics/globals"
+ "gvisor.dev/gvisor/tools/go_generics/globals"
)
var (
@@ -222,7 +222,11 @@ func main() {
// Modify the state tag appropriately.
if m := stateTagRegexp.FindStringSubmatch(ident.Name); m != nil {
if t := identifierRegexp.FindStringSubmatch(m[2]); t != nil {
- ident.Name = m[1] + `state:".(` + t[1] + *prefix + t[2] + *suffix + t[3] + `)"` + m[3]
+ typeName := *prefix + t[2] + *suffix
+ if n, ok := types[t[2]]; ok {
+ typeName = n
+ }
+ ident.Name = m[1] + `state:".(` + t[1] + typeName + t[3] + `)"` + m[3]
}
}
}
diff --git a/tools/go_generics/globals/BUILD b/tools/go_generics/globals/BUILD
index 6628132f5..74853c7d2 100644
--- a/tools/go_generics/globals/BUILD
+++ b/tools/go_generics/globals/BUILD
@@ -8,6 +8,6 @@ go_library(
"globals_visitor.go",
"scope.go",
],
- importpath = "gvisor.googlesource.com/gvisor/tools/go_generics/globals",
+ importpath = "gvisor.dev/gvisor/tools/go_generics/globals",
visibility = ["//tools/go_generics:__pkg__"],
)
diff --git a/tools/go_generics/globals/globals_visitor.go b/tools/go_generics/globals/globals_visitor.go
index 7ae48c662..3f948637b 100644
--- a/tools/go_generics/globals/globals_visitor.go
+++ b/tools/go_generics/globals/globals_visitor.go
@@ -132,7 +132,7 @@ func (v *globalsVisitor) visitFields(l *ast.FieldList, kind SymKind) {
}
}
-// visitGenDecl is called when a generic declation is encountered, for example,
+// visitGenDecl is called when a generic declaration is encountered, for example,
// on variable, constant and type declarations. It adds all newly defined
// symbols to the current scope and reports them if the current scope is the
// global one.
@@ -490,7 +490,7 @@ func (v *globalsVisitor) visitBlockStmt(s *ast.BlockStmt) {
v.popScope()
}
-// visitFuncDecl is called when a function or method declation is encountered.
+// visitFuncDecl is called when a function or method declaration is encountered.
// it creates a new scope for the function [optional] receiver, parameters and
// results, and visits all children nodes.
func (v *globalsVisitor) visitFuncDecl(d *ast.FuncDecl) {
diff --git a/tools/go_generics/imports.go b/tools/go_generics/imports.go
index 3a7230c97..148dc7216 100644
--- a/tools/go_generics/imports.go
+++ b/tools/go_generics/imports.go
@@ -23,7 +23,7 @@ import (
"go/token"
"strconv"
- "gvisor.googlesource.com/gvisor/tools/go_generics/globals"
+ "gvisor.dev/gvisor/tools/go_generics/globals"
)
type importedPackage struct {
diff --git a/tools/go_stateify/defs.bzl b/tools/go_stateify/defs.bzl
index 70ce73d7b..aeba197e2 100644
--- a/tools/go_stateify/defs.bzl
+++ b/tools/go_stateify/defs.bzl
@@ -50,7 +50,7 @@ def _go_stateify_impl(ctx):
args += ["-imports=%s" % ",".join(ctx.attr.imports)]
args += ["--"]
for src in ctx.attr.srcs:
- args += [f.path for f in src.files]
+ args += [f.path for f in src.files.to_list()]
ctx.actions.run(
inputs = ctx.files.srcs,
outputs = [output],
@@ -76,7 +76,7 @@ go_stateify = rule(
"package": attr.string(mandatory = True),
"out": attr.output(mandatory = True),
"_tool": attr.label(executable = True, cfg = "host", default = Label("//tools/go_stateify:stateify")),
- "_statepkg": attr.string(default = "gvisor.googlesource.com/gvisor/pkg/state"),
+ "_statepkg": attr.string(default = "gvisor.dev/gvisor/pkg/state"),
},
)
diff --git a/tools/nogo.js b/tools/nogo.js
new file mode 100644
index 000000000..fc0a4d1f0
--- /dev/null
+++ b/tools/nogo.js
@@ -0,0 +1,7 @@
+{
+ "checkunsafe": {
+ "exclude_files": {
+ "/external/": "not subject to constraint"
+ }
+ }
+}
diff --git a/tools/run_tests.sh b/tools/run_tests.sh
index b35d2e4b8..483b9cb50 100755
--- a/tools/run_tests.sh
+++ b/tools/run_tests.sh
@@ -45,7 +45,8 @@ readonly TEST_PACKAGES=("//pkg/..." "//runsc/..." "//tools/...")
#######################
# Install the latest version of Bazel and log the version.
-(which use_bazel.sh && use_bazel.sh latest) || which bazel
+# FIXME(b/137285694): Unable to build runsc with bazel 0.28.0.
+(which use_bazel.sh && use_bazel.sh 0.27.1) || which bazel
bazel version
# Load the kvm module.
@@ -92,6 +93,14 @@ build_everything() {
"${BUILD_PACKAGES[@]}"
}
+build_runsc_debian() {
+ cd ${WORKSPACE_DIR}
+
+ # TODO(b/135475885): pkg_deb is incompatible with Python3.
+ # https://github.com/bazelbuild/bazel/issues/8443
+ bazel build --host_force_python=py2 runsc:runsc-debian
+}
+
# Run simple tests runs the tests that require no special setup or
# configuration.
run_simple_tests() {
@@ -173,16 +182,24 @@ run_docker_tests() {
# These names are used to exclude tests not supported in certain
# configuration, e.g. save/restore not supported with hostnet.
- declare -a variations=("" "-kvm" "-hostnet" "-overlay")
- for v in "${variations[@]}"; do
- # Run runsc tests with docker that are tagged manual.
- bazel test \
- "${BAZEL_BUILD_FLAGS[@]}" \
- --test_env=RUNSC_RUNTIME="${RUNTIME}${v}" \
- --test_output=all \
- //runsc/test/image:image_test \
- //runsc/test/integration:integration_test
- done
+ # Run runsc tests with docker that are tagged manual.
+ #
+ # The --nocache_test_results option is used here to eliminate cached results
+ # from the previous run for the runc runtime.
+ bazel test \
+ "${BAZEL_BUILD_FLAGS[@]}" \
+ --test_env=RUNSC_RUNTIME="${RUNTIME}" \
+ --test_output=all \
+ --nocache_test_results \
+ --test_output=streamed \
+ //runsc/test/integration:integration_test \
+ //runsc/test/integration:integration_test_hostnet \
+ //runsc/test/integration:integration_test_overlay \
+ //runsc/test/integration:integration_test_kvm \
+ //runsc/test/image:image_test \
+ //runsc/test/image:image_test_overlay \
+ //runsc/test/image:image_test_hostnet \
+ //runsc/test/image:image_test_kvm
}
# Run the tests that require root.
@@ -208,8 +225,8 @@ run_runsc_do_tests() {
local runsc=$(find bazel-bin/runsc -type f -executable -name "runsc" | head -n1)
# run runsc do without root privileges.
- unshare -Ur ${runsc} --network=none --TESTONLY-unsafe-nonroot do true
- unshare -Ur ${runsc} --TESTONLY-unsafe-nonroot --network=host do --netns=false true
+ ${runsc} --rootless do true
+ ${runsc} --rootless --network=none do true
# run runsc do with root privileges.
sudo -n -E ${runsc} do true
@@ -273,6 +290,8 @@ main() {
run_syscall_tests
run_runsc_do_tests
+ build_runsc_debian
+
# Build other flavors too.
build_everything dbg
diff --git a/vdso/check_vdso.py b/vdso/check_vdso.py
index e41b09709..b3ee574f3 100644
--- a/vdso/check_vdso.py
+++ b/vdso/check_vdso.py
@@ -30,7 +30,7 @@ def PageRoundDown(addr):
addr: An address.
Returns:
- The address rounded down to thie nearest page.
+ The address rounded down to the nearest page.
"""
return addr & ~(PAGE_SIZE - 1)