summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--.buildkite/pipeline.yaml3
-rw-r--r--Makefile11
-rw-r--r--g3doc/architecture_guide/platforms.md4
-rw-r--r--nogo.yaml2
-rw-r--r--pkg/abi/linux/fs.go3
-rw-r--r--pkg/abi/linux/ptrace_amd64.go5
-rw-r--r--pkg/abi/linux/ptrace_arm64.go5
-rw-r--r--pkg/coverage/BUILD2
-rw-r--r--pkg/coverage/coverage.go4
-rw-r--r--pkg/hostarch/BUILD42
-rw-r--r--pkg/hostarch/access_type.go (renamed from pkg/usermem/access_type.go)2
-rw-r--r--pkg/hostarch/addr.go (renamed from pkg/usermem/addr.go)4
-rw-r--r--pkg/hostarch/addr_range_seq_test.go (renamed from pkg/usermem/addr_range_seq_test.go)2
-rw-r--r--pkg/hostarch/addr_range_seq_unsafe.go (renamed from pkg/usermem/addr_range_seq_unsafe.go)2
-rw-r--r--pkg/hostarch/hostarch.go7
-rw-r--r--pkg/hostarch/hostarch_arm64.go (renamed from pkg/usermem/usermem_arm64.go)2
-rw-r--r--pkg/hostarch/hostarch_x86.go (renamed from pkg/usermem/usermem_x86.go)2
-rw-r--r--pkg/marshal/BUILD2
-rw-r--r--pkg/marshal/marshal.go20
-rw-r--r--pkg/marshal/marshal_impl_util.go8
-rw-r--r--pkg/marshal/primitive/BUILD1
-rw-r--r--pkg/marshal/primitive/primitive.go53
-rw-r--r--pkg/merkletree/BUILD4
-rw-r--r--pkg/merkletree/merkletree.go6
-rw-r--r--pkg/merkletree/merkletree_test.go171
-rw-r--r--pkg/ring0/BUILD2
-rw-r--r--pkg/ring0/defs_amd64.go4
-rw-r--r--pkg/ring0/defs_arm64.go4
-rw-r--r--pkg/ring0/gen_offsets/BUILD2
-rw-r--r--pkg/ring0/kernel_amd64.go16
-rw-r--r--pkg/ring0/lib_arm64.go3
-rw-r--r--pkg/ring0/pagetables/BUILD7
-rw-r--r--pkg/ring0/pagetables/allocator_unsafe.go10
-rw-r--r--pkg/ring0/pagetables/pagetables.go18
-rw-r--r--pkg/ring0/pagetables/pagetables_aarch64.go6
-rw-r--r--pkg/ring0/pagetables/pagetables_amd64_test.go34
-rw-r--r--pkg/ring0/pagetables/pagetables_arm64_test.go4
-rw-r--r--pkg/ring0/pagetables/pagetables_test.go35
-rw-r--r--pkg/ring0/pagetables/pagetables_x86.go6
-rw-r--r--pkg/safecopy/safecopy_test.go2
-rw-r--r--pkg/seccomp/BUILD2
-rw-r--r--pkg/seccomp/seccomp_test.go4
-rw-r--r--pkg/sentry/arch/BUILD1
-rw-r--r--pkg/sentry/arch/arch.go20
-rw-r--r--pkg/sentry/arch/arch_amd64.go32
-rw-r--r--pkg/sentry/arch/arch_arm64.go28
-rw-r--r--pkg/sentry/arch/auxv.go4
-rw-r--r--pkg/sentry/arch/fpu/BUILD2
-rw-r--r--pkg/sentry/arch/fpu/fpu_amd64.go16
-rw-r--r--pkg/sentry/arch/fpu/fpu_arm64.go2
-rw-r--r--pkg/sentry/arch/signal.go50
-rw-r--r--pkg/sentry/arch/signal_amd64.go8
-rw-r--r--pkg/sentry/arch/signal_arm64.go6
-rw-r--r--pkg/sentry/arch/signal_stack.go10
-rw-r--r--pkg/sentry/arch/stack.go44
-rw-r--r--pkg/sentry/arch/stack_unsafe.go6
-rw-r--r--pkg/sentry/devices/memdev/zero.go1
-rw-r--r--pkg/sentry/devices/tundev/BUILD1
-rw-r--r--pkg/sentry/devices/tundev/tundev.go5
-rw-r--r--pkg/sentry/fs/BUILD1
-rw-r--r--pkg/sentry/fs/anon/BUILD2
-rw-r--r--pkg/sentry/fs/anon/anon.go4
-rw-r--r--pkg/sentry/fs/copy_up.go3
-rw-r--r--pkg/sentry/fs/dev/BUILD1
-rw-r--r--pkg/sentry/fs/dev/dev.go11
-rw-r--r--pkg/sentry/fs/dev/net_tun.go5
-rw-r--r--pkg/sentry/fs/fdpipe/BUILD1
-rw-r--r--pkg/sentry/fs/fdpipe/pipe_test.go4
-rw-r--r--pkg/sentry/fs/fsutil/BUILD2
-rw-r--r--pkg/sentry/fs/fsutil/dirty_set.go4
-rw-r--r--pkg/sentry/fs/fsutil/dirty_set_test.go12
-rw-r--r--pkg/sentry/fs/fsutil/file_range_set.go8
-rw-r--r--pkg/sentry/fs/fsutil/host_file_mapper.go6
-rw-r--r--pkg/sentry/fs/fsutil/host_mappable.go13
-rw-r--r--pkg/sentry/fs/fsutil/inode_cached.go21
-rw-r--r--pkg/sentry/fs/fsutil/inode_cached_test.go51
-rw-r--r--pkg/sentry/fs/gofer/BUILD1
-rw-r--r--pkg/sentry/fs/gofer/attr.go4
-rw-r--r--pkg/sentry/fs/inotify.go3
-rw-r--r--pkg/sentry/fs/inotify_event.go9
-rw-r--r--pkg/sentry/fs/offset.go4
-rw-r--r--pkg/sentry/fs/overlay.go10
-rw-r--r--pkg/sentry/fs/proc/BUILD1
-rw-r--r--pkg/sentry/fs/proc/exec_args.go7
-rw-r--r--pkg/sentry/fs/proc/inode.go4
-rw-r--r--pkg/sentry/fs/proc/meminfo.go4
-rw-r--r--pkg/sentry/fs/proc/net.go20
-rw-r--r--pkg/sentry/fs/proc/seqfile/BUILD1
-rw-r--r--pkg/sentry/fs/proc/seqfile/seqfile.go3
-rw-r--r--pkg/sentry/fs/proc/sys_net.go21
-rw-r--r--pkg/sentry/fs/proc/task.go13
-rw-r--r--pkg/sentry/fs/proc/uid_gid_map.go3
-rw-r--r--pkg/sentry/fs/ramfs/BUILD2
-rw-r--r--pkg/sentry/fs/ramfs/tree.go4
-rw-r--r--pkg/sentry/fs/sys/BUILD2
-rw-r--r--pkg/sentry/fs/sys/sys.go6
-rw-r--r--pkg/sentry/fs/timerfd/BUILD1
-rw-r--r--pkg/sentry/fs/timerfd/timerfd.go3
-rw-r--r--pkg/sentry/fs/tmpfs/BUILD2
-rw-r--r--pkg/sentry/fs/tmpfs/file_test.go3
-rw-r--r--pkg/sentry/fs/tmpfs/inode_file.go27
-rw-r--r--pkg/sentry/fs/tmpfs/tmpfs.go16
-rw-r--r--pkg/sentry/fs/tty/BUILD1
-rw-r--r--pkg/sentry/fs/tty/dir.go3
-rw-r--r--pkg/sentry/fsimpl/cgroupfs/BUILD47
-rw-r--r--pkg/sentry/fsimpl/cgroupfs/base.go233
-rw-r--r--pkg/sentry/fsimpl/cgroupfs/cgroupfs.go412
-rw-r--r--pkg/sentry/fsimpl/cgroupfs/cpu.go70
-rw-r--r--pkg/sentry/fsimpl/cgroupfs/cpuacct.go114
-rw-r--r--pkg/sentry/fsimpl/cgroupfs/cpuset.go39
-rw-r--r--pkg/sentry/fsimpl/cgroupfs/memory.go74
-rw-r--r--pkg/sentry/fsimpl/eventfd/BUILD1
-rw-r--r--pkg/sentry/fsimpl/eventfd/eventfd.go7
-rw-r--r--pkg/sentry/fsimpl/fuse/BUILD2
-rw-r--r--pkg/sentry/fsimpl/fuse/read_write.go20
-rw-r--r--pkg/sentry/fsimpl/fuse/request_response.go20
-rw-r--r--pkg/sentry/fsimpl/fuse/utils_test.go13
-rw-r--r--pkg/sentry/fsimpl/gofer/BUILD1
-rw-r--r--pkg/sentry/fsimpl/gofer/directory.go4
-rw-r--r--pkg/sentry/fsimpl/gofer/gofer.go27
-rw-r--r--pkg/sentry/fsimpl/gofer/regular_file.go30
-rw-r--r--pkg/sentry/fsimpl/gofer/save_restore.go4
-rw-r--r--pkg/sentry/fsimpl/host/BUILD1
-rw-r--r--pkg/sentry/fsimpl/host/host.go5
-rw-r--r--pkg/sentry/fsimpl/host/save_restore.go4
-rw-r--r--pkg/sentry/fsimpl/kernfs/BUILD1
-rw-r--r--pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go10
-rw-r--r--pkg/sentry/fsimpl/kernfs/inode_impl_util.go4
-rw-r--r--pkg/sentry/fsimpl/kernfs/kernfs.go10
-rw-r--r--pkg/sentry/fsimpl/kernfs/mmap_util.go14
-rw-r--r--pkg/sentry/fsimpl/overlay/BUILD1
-rw-r--r--pkg/sentry/fsimpl/overlay/copy_up.go6
-rw-r--r--pkg/sentry/fsimpl/overlay/regular_file.go9
-rw-r--r--pkg/sentry/fsimpl/pipefs/BUILD2
-rw-r--r--pkg/sentry/fsimpl/pipefs/pipefs.go4
-rw-r--r--pkg/sentry/fsimpl/proc/BUILD1
-rw-r--r--pkg/sentry/fsimpl/proc/filesystem.go6
-rw-r--r--pkg/sentry/fsimpl/proc/task.go23
-rw-r--r--pkg/sentry/fsimpl/proc/task_files.go60
-rw-r--r--pkg/sentry/fsimpl/proc/task_net.go20
-rw-r--r--pkg/sentry/fsimpl/proc/tasks.go19
-rw-r--r--pkg/sentry/fsimpl/proc/tasks_files.go20
-rw-r--r--pkg/sentry/fsimpl/proc/tasks_sys.go11
-rw-r--r--pkg/sentry/fsimpl/proc/yama.go3
-rw-r--r--pkg/sentry/fsimpl/testutil/BUILD1
-rw-r--r--pkg/sentry/fsimpl/testutil/testutil.go4
-rw-r--r--pkg/sentry/fsimpl/timerfd/BUILD1
-rw-r--r--pkg/sentry/fsimpl/timerfd/timerfd.go3
-rw-r--r--pkg/sentry/fsimpl/tmpfs/BUILD1
-rw-r--r--pkg/sentry/fsimpl/tmpfs/regular_file.go26
-rw-r--r--pkg/sentry/fsimpl/tmpfs/tmpfs.go14
-rw-r--r--pkg/sentry/fsimpl/verity/BUILD3
-rw-r--r--pkg/sentry/fsimpl/verity/verity.go261
-rw-r--r--pkg/sentry/fsimpl/verity/verity_test.go3
-rw-r--r--pkg/sentry/hostmm/BUILD2
-rw-r--r--pkg/sentry/hostmm/hostmm.go6
-rw-r--r--pkg/sentry/kernel/BUILD6
-rw-r--r--pkg/sentry/kernel/cgroup.go281
-rw-r--r--pkg/sentry/kernel/eventfd/BUILD1
-rw-r--r--pkg/sentry/kernel/eventfd/eventfd.go7
-rw-r--r--pkg/sentry/kernel/futex/BUILD3
-rw-r--r--pkg/sentry/kernel/futex/futex.go48
-rw-r--r--pkg/sentry/kernel/futex/futex_test.go16
-rw-r--r--pkg/sentry/kernel/kcov.go8
-rw-r--r--pkg/sentry/kernel/kernel.go52
-rw-r--r--pkg/sentry/kernel/pipe/BUILD1
-rw-r--r--pkg/sentry/kernel/pipe/pipe.go6
-rw-r--r--pkg/sentry/kernel/pipe/vfs.go21
-rw-r--r--pkg/sentry/kernel/ptrace.go11
-rw-r--r--pkg/sentry/kernel/ptrace_amd64.go3
-rw-r--r--pkg/sentry/kernel/ptrace_arm64.go4
-rw-r--r--pkg/sentry/kernel/rseq.go35
-rw-r--r--pkg/sentry/kernel/seccomp.go10
-rw-r--r--pkg/sentry/kernel/shm/BUILD1
-rw-r--r--pkg/sentry/kernel/shm/shm.go30
-rw-r--r--pkg/sentry/kernel/syscalls.go8
-rw-r--r--pkg/sentry/kernel/task.go18
-rw-r--r--pkg/sentry/kernel/task_cgroup.go138
-rw-r--r--pkg/sentry/kernel/task_clone.go11
-rw-r--r--pkg/sentry/kernel/task_exit.go4
-rw-r--r--pkg/sentry/kernel/task_futex.go27
-rw-r--r--pkg/sentry/kernel/task_image.go4
-rw-r--r--pkg/sentry/kernel/task_log.go13
-rw-r--r--pkg/sentry/kernel/task_run.go8
-rw-r--r--pkg/sentry/kernel/task_signals.go16
-rw-r--r--pkg/sentry/kernel/task_start.go9
-rw-r--r--pkg/sentry/kernel/task_syscall.go12
-rw-r--r--pkg/sentry/kernel/task_usermem.go67
-rw-r--r--pkg/sentry/kernel/threads.go9
-rw-r--r--pkg/sentry/kernel/timekeeper_test.go4
-rw-r--r--pkg/sentry/kernel/vdso.go4
-rw-r--r--pkg/sentry/loader/BUILD1
-rw-r--r--pkg/sentry/loader/elf.go49
-rw-r--r--pkg/sentry/loader/loader.go11
-rw-r--r--pkg/sentry/loader/vdso.go25
-rw-r--r--pkg/sentry/memmap/BUILD3
-rw-r--r--pkg/sentry/memmap/mapping_set.go28
-rw-r--r--pkg/sentry/memmap/mapping_set_test.go65
-rw-r--r--pkg/sentry/memmap/memmap.go43
-rw-r--r--pkg/sentry/mm/BUILD14
-rw-r--r--pkg/sentry/mm/address_space.go10
-rw-r--r--pkg/sentry/mm/aio_context.go21
-rw-r--r--pkg/sentry/mm/io.go75
-rw-r--r--pkg/sentry/mm/lifecycle.go4
-rw-r--r--pkg/sentry/mm/metadata.go18
-rw-r--r--pkg/sentry/mm/mm.go24
-rw-r--r--pkg/sentry/mm/mm_test.go35
-rw-r--r--pkg/sentry/mm/pma.go74
-rw-r--r--pkg/sentry/mm/procfs.go20
-rw-r--r--pkg/sentry/mm/shm.go6
-rw-r--r--pkg/sentry/mm/special_mappable.go14
-rw-r--r--pkg/sentry/mm/syscalls.go106
-rw-r--r--pkg/sentry/mm/vma.go86
-rw-r--r--pkg/sentry/pgalloc/BUILD3
-rw-r--r--pkg/sentry/pgalloc/pgalloc.go34
-rw-r--r--pkg/sentry/pgalloc/pgalloc_test.go6
-rw-r--r--pkg/sentry/pgalloc/save_restore.go10
-rw-r--r--pkg/sentry/platform/BUILD1
-rw-r--r--pkg/sentry/platform/kvm/BUILD4
-rw-r--r--pkg/sentry/platform/kvm/address_space.go18
-rw-r--r--pkg/sentry/platform/kvm/bluepill_fault.go4
-rw-r--r--pkg/sentry/platform/kvm/context.go6
-rw-r--r--pkg/sentry/platform/kvm/kvm.go10
-rw-r--r--pkg/sentry/platform/kvm/kvm_test.go8
-rw-r--r--pkg/sentry/platform/kvm/machine.go8
-rw-r--r--pkg/sentry/platform/kvm/machine_amd64.go46
-rw-r--r--pkg/sentry/platform/kvm/machine_arm64.go16
-rw-r--r--pkg/sentry/platform/kvm/machine_arm64_unsafe.go14
-rw-r--r--pkg/sentry/platform/kvm/physical_map.go10
-rw-r--r--pkg/sentry/platform/kvm/virtual_map.go6
-rw-r--r--pkg/sentry/platform/kvm/virtual_map_test.go14
-rw-r--r--pkg/sentry/platform/mmap_min_addr.go8
-rw-r--r--pkg/sentry/platform/platform.go43
-rw-r--r--pkg/sentry/platform/ptrace/BUILD2
-rw-r--r--pkg/sentry/platform/ptrace/ptrace.go26
-rw-r--r--pkg/sentry/platform/ptrace/ptrace_unsafe.go4
-rw-r--r--pkg/sentry/platform/ptrace/stub_unsafe.go8
-rw-r--r--pkg/sentry/platform/ptrace/subprocess.go64
-rw-r--r--pkg/sentry/socket/BUILD1
-rw-r--r--pkg/sentry/socket/control/BUILD4
-rw-r--r--pkg/sentry/socket/control/control.go26
-rw-r--r--pkg/sentry/socket/control/control_test.go6
-rw-r--r--pkg/sentry/socket/hostinet/BUILD1
-rw-r--r--pkg/sentry/socket/hostinet/socket.go19
-rw-r--r--pkg/sentry/socket/hostinet/socket_unsafe.go3
-rw-r--r--pkg/sentry/socket/hostinet/stack.go10
-rw-r--r--pkg/sentry/socket/netfilter/BUILD2
-rw-r--r--pkg/sentry/socket/netfilter/extensions.go4
-rw-r--r--pkg/sentry/socket/netfilter/ipv4.go4
-rw-r--r--pkg/sentry/socket/netfilter/ipv6.go4
-rw-r--r--pkg/sentry/socket/netfilter/netfilter.go14
-rw-r--r--pkg/sentry/socket/netfilter/owner_matcher.go6
-rw-r--r--pkg/sentry/socket/netfilter/targets.go24
-rw-r--r--pkg/sentry/socket/netfilter/tcp_matcher.go6
-rw-r--r--pkg/sentry/socket/netfilter/udp_matcher.go6
-rw-r--r--pkg/sentry/socket/netlink/BUILD1
-rw-r--r--pkg/sentry/socket/netlink/message.go14
-rw-r--r--pkg/sentry/socket/netlink/socket.go9
-rw-r--r--pkg/sentry/socket/netstack/BUILD1
-rw-r--r--pkg/sentry/socket/netstack/netstack.go101
-rw-r--r--pkg/sentry/socket/netstack/netstack_vfs2.go7
-rw-r--r--pkg/sentry/socket/socket.go21
-rw-r--r--pkg/sentry/socket/unix/BUILD1
-rw-r--r--pkg/sentry/socket/unix/unix.go3
-rw-r--r--pkg/sentry/socket/unix/unix_vfs2.go3
-rw-r--r--pkg/sentry/strace/BUILD2
-rw-r--r--pkg/sentry/strace/epoll.go7
-rw-r--r--pkg/sentry/strace/poll.go5
-rw-r--r--pkg/sentry/strace/select.go5
-rw-r--r--pkg/sentry/strace/signal.go9
-rw-r--r--pkg/sentry/strace/socket.go33
-rw-r--r--pkg/sentry/strace/strace.go39
-rw-r--r--pkg/sentry/syscalls/linux/BUILD1
-rw-r--r--pkg/sentry/syscalls/linux/linux64.go6
-rw-r--r--pkg/sentry/syscalls/linux/sigset.go16
-rw-r--r--pkg/sentry/syscalls/linux/sys_aio.go17
-rw-r--r--pkg/sentry/syscalls/linux/sys_file.go34
-rw-r--r--pkg/sentry/syscalls/linux/sys_futex.go12
-rw-r--r--pkg/sentry/syscalls/linux/sys_getdents.go3
-rw-r--r--pkg/sentry/syscalls/linux/sys_mempolicy.go15
-rw-r--r--pkg/sentry/syscalls/linux/sys_mmap.go15
-rw-r--r--pkg/sentry/syscalls/linux/sys_mount.go7
-rw-r--r--pkg/sentry/syscalls/linux/sys_pipe.go4
-rw-r--r--pkg/sentry/syscalls/linux/sys_poll.go18
-rw-r--r--pkg/sentry/syscalls/linux/sys_random.go4
-rw-r--r--pkg/sentry/syscalls/linux/sys_rlimit.go6
-rw-r--r--pkg/sentry/syscalls/linux/sys_seccomp.go10
-rw-r--r--pkg/sentry/syscalls/linux/sys_sem.go6
-rw-r--r--pkg/sentry/syscalls/linux/sys_signal.go4
-rw-r--r--pkg/sentry/syscalls/linux/sys_socket.go43
-rw-r--r--pkg/sentry/syscalls/linux/sys_stat.go10
-rw-r--r--pkg/sentry/syscalls/linux/sys_thread.go9
-rw-r--r--pkg/sentry/syscalls/linux/sys_time.go12
-rw-r--r--pkg/sentry/syscalls/linux/sys_xattr.go12
-rw-r--r--pkg/sentry/syscalls/linux/timespec.go28
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/BUILD1
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/aio.go16
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/execve.go5
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/filesystem.go19
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/getdents.go21
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/mmap.go7
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/mount.go9
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/path.go5
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/pipe.go5
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/poll.go25
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/setstat.go11
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/signal.go5
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/socket.go44
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/stat.go9
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/xattr.go11
-rw-r--r--pkg/sentry/vfs/BUILD1
-rw-r--r--pkg/sentry/vfs/anonfs.go4
-rw-r--r--pkg/sentry/vfs/file_description_impl_util.go3
-rw-r--r--pkg/sentry/vfs/filesystem_impl_util.go4
-rw-r--r--pkg/sentry/vfs/inotify.go11
-rw-r--r--pkg/sentry/vfs/mount.go17
-rw-r--r--pkg/tcpip/header/ipv4.go32
-rw-r--r--pkg/tcpip/header/ipv4_test.go75
-rw-r--r--pkg/tcpip/header/ipv6.go70
-rw-r--r--pkg/tcpip/header/ipv6_test.go80
-rw-r--r--pkg/tcpip/network/internal/ip/generic_multicast_protocol.go57
-rw-r--r--pkg/tcpip/network/internal/ip/generic_multicast_protocol_test.go27
-rw-r--r--pkg/tcpip/network/ip_test.go78
-rw-r--r--pkg/tcpip/network/ipv4/igmp.go12
-rw-r--r--pkg/tcpip/network/ipv4/ipv4.go75
-rw-r--r--pkg/tcpip/network/ipv6/ipv6.go75
-rw-r--r--pkg/tcpip/network/ipv6/mld.go22
-rw-r--r--pkg/tcpip/network/ipv6/mld_test.go140
-rw-r--r--pkg/tcpip/network/ipv6/ndp.go8
-rw-r--r--pkg/tcpip/network/multicast_group_test.go6
-rw-r--r--pkg/tcpip/stack/ndp_test.go6
-rw-r--r--pkg/tcpip/tcpip.go20
-rw-r--r--pkg/tcpip/tests/integration/BUILD2
-rw-r--r--pkg/tcpip/tests/integration/forward_test.go192
-rw-r--r--pkg/tcpip/tests/integration/loopback_test.go9
-rw-r--r--pkg/tcpip/tests/integration/multicast_broadcast_test.go9
-rw-r--r--pkg/tcpip/tests/utils/utils.go8
-rw-r--r--pkg/tcpip/transport/tcp/accept.go91
-rw-r--r--pkg/tcpip/transport/tcp/connect.go20
-rw-r--r--pkg/tcpip/transport/tcp/dual_stack_test.go14
-rw-r--r--pkg/tcpip/transport/tcp/endpoint.go14
-rw-r--r--pkg/tcpip/transport/tcp/protocol.go77
-rw-r--r--pkg/tcpip/transport/tcp/tcp_sack_test.go14
-rw-r--r--pkg/tcpip/transport/tcp/tcp_test.go85
-rw-r--r--pkg/tcpip/transport/tcp/tcp_timestamp_test.go8
-rw-r--r--pkg/test/dockerutil/container.go9
-rw-r--r--pkg/usermem/BUILD24
-rw-r--r--pkg/usermem/bytes_io.go21
-rw-r--r--pkg/usermem/bytes_io_unsafe.go7
-rw-r--r--pkg/usermem/usermem.go52
-rw-r--r--pkg/usermem/usermem_test.go9
-rw-r--r--runsc/BUILD1
-rw-r--r--runsc/boot/BUILD2
-rw-r--r--runsc/boot/controller.go2
-rw-r--r--runsc/boot/fs.go48
-rw-r--r--runsc/boot/loader.go2
-rw-r--r--runsc/boot/loader_test.go17
-rw-r--r--runsc/boot/vfs.go94
-rw-r--r--runsc/cli/main.go1
-rw-r--r--runsc/cmd/BUILD1
-rw-r--r--runsc/cmd/do.go108
-rw-r--r--runsc/cmd/gofer.go6
-rw-r--r--runsc/cmd/verity_prepare.go108
-rw-r--r--runsc/config/config.go3
-rw-r--r--runsc/config/flags.go1
-rw-r--r--runsc/container/container.go2
-rw-r--r--runsc/sandbox/sandbox.go4
-rw-r--r--runsc/specutils/fs.go18
-rw-r--r--runsc/specutils/specutils.go16
-rw-r--r--shim/BUILD1
-rw-r--r--test/benchmarks/base/BUILD3
-rw-r--r--test/benchmarks/database/BUILD1
-rw-r--r--test/benchmarks/fs/BUILD2
-rw-r--r--test/benchmarks/media/BUILD1
-rw-r--r--test/benchmarks/ml/BUILD1
-rw-r--r--test/benchmarks/network/BUILD5
-rw-r--r--test/e2e/BUILD1
-rw-r--r--test/e2e/integration_test.go77
-rw-r--r--test/e2e/regression_test.go47
-rw-r--r--test/fsstress/BUILD4
-rw-r--r--test/fsstress/fsstress_test.go43
-rw-r--r--test/image/image_test.go5
-rw-r--r--test/iptables/BUILD2
-rw-r--r--test/iptables/nat.go6
-rw-r--r--test/packetimpact/runner/defs.bzl6
-rw-r--r--test/packetimpact/testbench/BUILD2
-rw-r--r--test/packetimpact/testbench/rawsockets.go4
-rw-r--r--test/packetimpact/tests/BUILD26
-rw-r--r--test/packetimpact/tests/tcp_info_test.go6
-rw-r--r--test/packetimpact/tests/tcp_listen_backlog_test.go86
-rw-r--r--test/packetimpact/tests/tcp_rack_test.go6
-rw-r--r--test/packetimpact/tests/tcp_retransmits_test.go4
-rw-r--r--test/packetimpact/tests/tcp_syncookie_test.go70
-rw-r--r--test/perf/BUILD9
-rw-r--r--test/perf/linux/getpid_benchmark.cc18
-rw-r--r--test/runtimes/defs.bzl1
-rw-r--r--test/syscalls/BUILD18
-rw-r--r--test/syscalls/linux/BUILD51
-rw-r--r--test/syscalls/linux/cgroup.cc421
-rw-r--r--test/syscalls/linux/semaphore.cc8
-rw-r--r--test/syscalls/linux/socket_inet_loopback.cc334
-rw-r--r--test/syscalls/linux/verity_ioctl.cc133
-rw-r--r--test/syscalls/linux/verity_mount.cc53
-rw-r--r--test/util/BUILD18
-rw-r--r--test/util/cgroup_util.cc223
-rw-r--r--test/util/cgroup_util.h111
-rw-r--r--test/util/fs_util.cc44
-rw-r--r--test/util/fs_util.h12
-rw-r--r--tools/go_marshal/defs.bzl2
-rw-r--r--tools/go_marshal/gomarshal/generator.go2
-rw-r--r--tools/go_marshal/gomarshal/generator_interfaces.go24
-rw-r--r--tools/go_marshal/gomarshal/generator_interfaces_array_newtype.go8
-rw-r--r--tools/go_marshal/gomarshal/generator_interfaces_dynamic.go12
-rw-r--r--tools/go_marshal/gomarshal/generator_interfaces_primitive_newtype.go38
-rw-r--r--tools/go_marshal/gomarshal/generator_interfaces_struct.go18
-rw-r--r--tools/go_marshal/gomarshal/generator_tests.go6
-rw-r--r--tools/go_marshal/test/BUILD3
-rw-r--r--tools/go_marshal/test/benchmark_test.go80
-rw-r--r--tools/go_marshal/test/escape/BUILD2
-rw-r--r--tools/go_marshal/test/escape/escape.go22
-rw-r--r--tools/go_marshal/test/marshal_test.go57
-rw-r--r--tools/nogo/analyzers.go6
-rw-r--r--tools/nogo/check/main.go17
-rw-r--r--tools/nogo/defs.bzl42
424 files changed, 7034 insertions, 2743 deletions
diff --git a/.buildkite/pipeline.yaml b/.buildkite/pipeline.yaml
index 3bc5041c0..9163db56d 100644
--- a/.buildkite/pipeline.yaml
+++ b/.buildkite/pipeline.yaml
@@ -90,6 +90,9 @@ steps:
label: ":person_in_lotus_position: KVM tests"
command: make kvm-tests
- <<: *common
+ label: ":weight_lifter: Fsstress test"
+ command: make fsstress-test
+ - <<: *common
label: ":docker: Containerd 1.3.9 tests"
command: make containerd-test-1.3.9
- <<: *common
diff --git a/Makefile b/Makefile
index 0f79b6a18..ea0674f77 100644
--- a/Makefile
+++ b/Makefile
@@ -144,6 +144,7 @@ dev: $(RUNTIME_BIN) ## Installs a set of local runtimes. Requires sudo.
@$(call configure_noreload,$(RUNTIME)-p,--net-raw --profile)
@$(call configure_noreload,$(RUNTIME)-vfs2-d,--net-raw --debug --strace --log-packets --vfs2)
@$(call configure_noreload,$(RUNTIME)-vfs2-fuse-d,--net-raw --debug --strace --log-packets --vfs2 --fuse)
+ @$(call configure_noreload,$(RUNTIME)-vfs2-cgroup-d,--net-raw --debug --strace --log-packets --vfs2 --cgroupfs)
@$(call reload_docker)
.PHONY: dev
@@ -340,7 +341,8 @@ BENCHMARKS_FILTER := .
BENCHMARKS_OPTIONS := -test.benchtime=30s
BENCHMARKS_ARGS := -test.v -test.bench=$(BENCHMARKS_FILTER) $(BENCHMARKS_OPTIONS)
BENCHMARKS_PROFILE := -pprof-dir=/tmp/profile -pprof-cpu -pprof-heap -pprof-block -pprof-mutex
-BENCH_RUNTIME_ARGS ?= --vfs2
+BENCH_VFS := --vfs2
+BENCH_RUNTIME_ARGS ?=
init-benchmark-table: ## Initializes a BigQuery table with the benchmark schema.
@$(call run,//tools/parsers:parser,init --project=$(BENCHMARKS_PROJECT) --dataset=$(BENCHMARKS_DATASET) --table=$(BENCHMARKS_TABLE))
@@ -361,13 +363,14 @@ run_benchmark = \
benchmark-platforms: load-benchmarks $(RUNTIME_BIN) ## Runs benchmarks for runc and all given platforms in BENCHMARK_PLATFORMS.
@$(foreach PLATFORM,$(BENCHMARKS_PLATFORMS), \
- $(call run_benchmark,$(PLATFORM),--platform=$(PLATFORM) $(BENCH_RUNTIME_ARGS)) && \
- ) true
+ $(call run_benchmark,$(PLATFORM),--platform=$(PLATFORM) $(BENCH_RUNTIME_ARGS) --vfs2) && \
+ $(call run_benchmark,$(PLATFORM)_vfs1,--platform=$(PLATFORM) $(BENCH_RUNTIME_ARGS)) && \
+ ) true
@$(call run_benchmark,runc)
.PHONY: benchmark-platforms
run-benchmark: load-benchmarks $(RUNTIME_BIN) ## Runs single benchmark and optionally sends data to BigQuery.
- @$(call run_benchmark,$(RUNTIME),$(BENCH_RUNTIME_ARGS))
+ @$(call run_benchmark,$(RUNTIME)$(BENCH_VFS),$(BENCH_RUNTIME_ARGS) $(BENCH_VFS))
.PHONY: run-benchmark
##
diff --git a/g3doc/architecture_guide/platforms.md b/g3doc/architecture_guide/platforms.md
index d112c9a28..e19c77236 100644
--- a/g3doc/architecture_guide/platforms.md
+++ b/g3doc/architecture_guide/platforms.md
@@ -18,8 +18,8 @@ type Context interface {
}
type AddressSpace interface {
- MapFile(addr usermem.Addr, f File, fr FileRange, at usermem.AccessType, ...) error
- Unmap(addr usermem.Addr, length uint64)
+ MapFile(addr hostarch.Addr, f File, fr FileRange, at hostarch.AccessType, ...) error
+ Unmap(addr hostarch.Addr, length uint64)
}
```
diff --git a/nogo.yaml b/nogo.yaml
index c0445a837..1e72d9e29 100644
--- a/nogo.yaml
+++ b/nogo.yaml
@@ -55,8 +55,6 @@ global:
# Same story for underscores.
- "should not use ALL_CAPS in Go names"
- "should not use underscores in Go names"
- # TODO(b/179817829): Upgrade to flock to v0.8.0.
- - "flock.NewFlock is deprecated: Use New instead"
exclude:
# Generated: exempt all.
- pkg/shim/runtimeoptions/runtimeoptions_cri.go
diff --git a/pkg/abi/linux/fs.go b/pkg/abi/linux/fs.go
index 0d921ed6f..cad24fcc7 100644
--- a/pkg/abi/linux/fs.go
+++ b/pkg/abi/linux/fs.go
@@ -19,8 +19,10 @@ package linux
// See linux/magic.h.
const (
ANON_INODE_FS_MAGIC = 0x09041934
+ CGROUP_SUPER_MAGIC = 0x27e0eb
DEVPTS_SUPER_MAGIC = 0x00001cd1
EXT_SUPER_MAGIC = 0xef53
+ FUSE_SUPER_MAGIC = 0x65735546
OVERLAYFS_SUPER_MAGIC = 0x794c7630
PIPEFS_MAGIC = 0x50495045
PROC_SUPER_MAGIC = 0x9fa0
@@ -29,7 +31,6 @@ const (
SYSFS_MAGIC = 0x62656572
TMPFS_MAGIC = 0x01021994
V9FS_MAGIC = 0x01021997
- FUSE_SUPER_MAGIC = 0x65735546
)
// Filesystem path limits, from uapi/linux/limits.h.
diff --git a/pkg/abi/linux/ptrace_amd64.go b/pkg/abi/linux/ptrace_amd64.go
index 50e22fe7e..e722971f1 100644
--- a/pkg/abi/linux/ptrace_amd64.go
+++ b/pkg/abi/linux/ptrace_amd64.go
@@ -61,3 +61,8 @@ func (p *PtraceRegs) InstructionPointer() uint64 {
func (p *PtraceRegs) StackPointer() uint64 {
return p.Rsp
}
+
+// SetStackPointer sets the stack pointer to the specified value.
+func (p *PtraceRegs) SetStackPointer(sp uint64) {
+ p.Rsp = sp
+}
diff --git a/pkg/abi/linux/ptrace_arm64.go b/pkg/abi/linux/ptrace_arm64.go
index da36811d2..3d0906565 100644
--- a/pkg/abi/linux/ptrace_arm64.go
+++ b/pkg/abi/linux/ptrace_arm64.go
@@ -38,3 +38,8 @@ func (p *PtraceRegs) InstructionPointer() uint64 {
func (p *PtraceRegs) StackPointer() uint64 {
return p.Sp
}
+
+// SetStackPointer sets the stack pointer to the specified value.
+func (p *PtraceRegs) SetStackPointer(sp uint64) {
+ p.Sp = sp
+}
diff --git a/pkg/coverage/BUILD b/pkg/coverage/BUILD
index a198e8028..ace5895f8 100644
--- a/pkg/coverage/BUILD
+++ b/pkg/coverage/BUILD
@@ -7,8 +7,8 @@ go_library(
srcs = ["coverage.go"],
visibility = ["//:sandbox"],
deps = [
+ "//pkg/hostarch",
"//pkg/sync",
- "//pkg/usermem",
"@io_bazel_rules_go//go/tools/coverdata",
],
)
diff --git a/pkg/coverage/coverage.go b/pkg/coverage/coverage.go
index 6f3d72e83..a6778a005 100644
--- a/pkg/coverage/coverage.go
+++ b/pkg/coverage/coverage.go
@@ -28,8 +28,8 @@ import (
"sort"
"testing"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/usermem"
"github.com/bazelbuild/rules_go/go/tools/coverdata"
)
@@ -141,7 +141,7 @@ func ConsumeCoverageData(w io.Writer) int {
// Non-zero coverage data found; consume it and report as a PC.
counters[index] = 0
pc := globalData.syntheticPCs[fileNum][index]
- usermem.ByteOrder.PutUint64(pcBuffer[:], pc)
+ hostarch.ByteOrder.PutUint64(pcBuffer[:], pc)
n, err := w.Write(pcBuffer[:])
if err != nil {
if err == io.EOF {
diff --git a/pkg/hostarch/BUILD b/pkg/hostarch/BUILD
new file mode 100644
index 000000000..1e8def4d9
--- /dev/null
+++ b/pkg/hostarch/BUILD
@@ -0,0 +1,42 @@
+load("//tools:defs.bzl", "go_library", "go_test")
+load("//tools/go_generics:defs.bzl", "go_template_instance")
+
+package(licenses = ["notice"])
+
+go_template_instance(
+ name = "addr_range",
+ out = "addr_range.go",
+ package = "hostarch",
+ prefix = "Addr",
+ template = "//pkg/segment:generic_range",
+ types = {
+ "T": "Addr",
+ },
+)
+
+go_test(
+ name = "hostarch_test",
+ size = "small",
+ srcs = [
+ "addr_range_seq_test.go",
+ ],
+ library = ":hostarch",
+)
+
+go_library(
+ name = "hostarch",
+ srcs = [
+ "access_type.go",
+ "addr.go",
+ "addr_range.go",
+ "addr_range_seq_unsafe.go",
+ "hostarch.go",
+ "hostarch_arm64.go",
+ "hostarch_x86.go",
+ ],
+ visibility = ["//:sandbox"],
+ deps = [
+ "//pkg/gohacks",
+ "@org_golang_x_sys//unix:go_default_library",
+ ],
+)
diff --git a/pkg/usermem/access_type.go b/pkg/hostarch/access_type.go
index 2cfca29af..e30476840 100644
--- a/pkg/usermem/access_type.go
+++ b/pkg/hostarch/access_type.go
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-package usermem
+package hostarch
import "golang.org/x/sys/unix"
diff --git a/pkg/usermem/addr.go b/pkg/hostarch/addr.go
index c4100481e..0cf0f3c81 100644
--- a/pkg/usermem/addr.go
+++ b/pkg/hostarch/addr.go
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-package usermem
+package hostarch
import (
"fmt"
@@ -57,7 +57,7 @@ func (v Addr) RoundUp() (addr Addr, ok bool) {
func (v Addr) MustRoundUp() Addr {
addr, ok := v.RoundUp()
if !ok {
- panic(fmt.Sprintf("usermem.Addr(%d).RoundUp() wraps", v))
+ panic(fmt.Sprintf("hostarch.Addr(%d).RoundUp() wraps", v))
}
return addr
}
diff --git a/pkg/usermem/addr_range_seq_test.go b/pkg/hostarch/addr_range_seq_test.go
index 82f735026..5726dfd19 100644
--- a/pkg/usermem/addr_range_seq_test.go
+++ b/pkg/hostarch/addr_range_seq_test.go
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-package usermem
+package hostarch
import (
"testing"
diff --git a/pkg/usermem/addr_range_seq_unsafe.go b/pkg/hostarch/addr_range_seq_unsafe.go
index c9a1415a0..ecc17d595 100644
--- a/pkg/usermem/addr_range_seq_unsafe.go
+++ b/pkg/hostarch/addr_range_seq_unsafe.go
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-package usermem
+package hostarch
import (
"bytes"
diff --git a/pkg/hostarch/hostarch.go b/pkg/hostarch/hostarch.go
new file mode 100644
index 000000000..fdd29c567
--- /dev/null
+++ b/pkg/hostarch/hostarch.go
@@ -0,0 +1,7 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package hostarch contains host arch address operations for user memory.
+package hostarch
diff --git a/pkg/usermem/usermem_arm64.go b/pkg/hostarch/hostarch_arm64.go
index 7e7529585..a31a8aeeb 100644
--- a/pkg/usermem/usermem_arm64.go
+++ b/pkg/hostarch/hostarch_arm64.go
@@ -14,7 +14,7 @@
// +build arm64
-package usermem
+package hostarch
import (
"encoding/binary"
diff --git a/pkg/usermem/usermem_x86.go b/pkg/hostarch/hostarch_x86.go
index d96f829fb..af6ef2b7f 100644
--- a/pkg/usermem/usermem_x86.go
+++ b/pkg/hostarch/hostarch_x86.go
@@ -14,7 +14,7 @@
// +build amd64 386
-package usermem
+package hostarch
import "encoding/binary"
diff --git a/pkg/marshal/BUILD b/pkg/marshal/BUILD
index aac0161fa..7cd89e639 100644
--- a/pkg/marshal/BUILD
+++ b/pkg/marshal/BUILD
@@ -11,5 +11,5 @@ go_library(
visibility = [
"//:sandbox",
],
- deps = ["//pkg/usermem"],
+ deps = ["//pkg/hostarch"],
)
diff --git a/pkg/marshal/marshal.go b/pkg/marshal/marshal.go
index d8cb44b40..eb036feae 100644
--- a/pkg/marshal/marshal.go
+++ b/pkg/marshal/marshal.go
@@ -23,7 +23,7 @@ package marshal
import (
"io"
- "gvisor.dev/gvisor/pkg/usermem"
+ "gvisor.dev/gvisor/pkg/hostarch"
)
// CopyContext defines the memory operations required to marshal to and from
@@ -36,11 +36,11 @@ type CopyContext interface {
// CopyOutBytes writes the contents of b to the task's memory. See
// kernel.CopyOutBytes.
- CopyOutBytes(addr usermem.Addr, b []byte) (int, error)
+ CopyOutBytes(addr hostarch.Addr, b []byte) (int, error)
// CopyInBytes reads the contents of the task's memory to b. See
// kernel.CopyInBytes.
- CopyInBytes(addr usermem.Addr, b []byte) (int, error)
+ CopyInBytes(addr hostarch.Addr, b []byte) (int, error)
}
// Marshallable represents operations on a type that can be marshalled to and
@@ -108,7 +108,7 @@ type Marshallable interface {
// If the copy-in from the task memory is only partially successful, CopyIn
// should still attempt to deserialize as much data as possible. See comment
// for UnmarshalBytes.
- CopyIn(cc CopyContext, addr usermem.Addr) (int, error)
+ CopyIn(cc CopyContext, addr hostarch.Addr) (int, error)
// CopyOut serializes a Marshallable type to a task's memory. This may only
// be called from a task goroutine. This is more efficient than calling
@@ -119,7 +119,7 @@ type Marshallable interface {
// The copy-out to the task memory may be partially successful, in which
// case CopyOut returns how much data was serialized. See comment for
// MarshalBytes for implications.
- CopyOut(cc CopyContext, addr usermem.Addr) (int, error)
+ CopyOut(cc CopyContext, addr hostarch.Addr) (int, error)
// CopyOutN is like CopyOut, but explicitly requests a partial
// copy-out. Note that this may yield unexpected results for non-packed
@@ -127,7 +127,7 @@ type Marshallable interface {
// comment on MarshalBytes.
//
// The limit must be less than or equal to SizeBytes().
- CopyOutN(cc CopyContext, addr usermem.Addr, limit int) (int, error)
+ CopyOutN(cc CopyContext, addr hostarch.Addr, limit int) (int, error)
}
// go-marshal generates additional functions for a type based on additional
@@ -157,10 +157,10 @@ type Marshallable interface {
// func UnmarshalUnsafeFooSlice(dst []Foo, src []byte) (int, error) { ... }
//
// // CopyFooSliceIn copies in a slice of Foo objects from the task's memory.
-// func CopyFooSliceIn(cc marshal.CopyContext, addr usermem.Addr, dst []Foo) (int, error) { ... }
+// func CopyFooSliceIn(cc marshal.CopyContext, addr hostarch.Addr, dst []Foo) (int, error) { ... }
//
// // CopyFooSliceIn copies out a slice of Foo objects to the task's memory.
-// func CopyFooSliceOut(cc marshal.CopyContext, addr usermem.Addr, src []Foo) (int, error) { ... }
+// func CopyFooSliceOut(cc marshal.CopyContext, addr hostarch.Addr, src []Foo) (int, error) { ... }
//
// The name of the functions are of the format "Copy%sIn" and "Copy%sOut", where
// %s is the first argument to the slice clause. This directive is not supported
@@ -175,10 +175,10 @@ type Marshallable interface {
// This is only valid on newtypes on primitives, and causes the generated
// functions to accept slices of the inner type instead:
//
-// func CopyInt32SliceIn(cc marshal.CopyContext, addr usermem.Addr, dst []int32) (int, error) { ... }
+// func CopyInt32SliceIn(cc marshal.CopyContext, addr hostarch.Addr, dst []int32) (int, error) { ... }
//
// Without "inner", they would instead be:
//
-// func CopyInt32SliceIn(cc marshal.CopyContext, addr usermem.Addr, dst []Int32) (int, error) { ... }
+// func CopyInt32SliceIn(cc marshal.CopyContext, addr hostarch.Addr, dst []Int32) (int, error) { ... }
//
// This may help avoid a cast depending on how the generated functions are used.
diff --git a/pkg/marshal/marshal_impl_util.go b/pkg/marshal/marshal_impl_util.go
index ea75e09f2..9e6a6fa29 100644
--- a/pkg/marshal/marshal_impl_util.go
+++ b/pkg/marshal/marshal_impl_util.go
@@ -17,7 +17,7 @@ package marshal
import (
"io"
- "gvisor.dev/gvisor/pkg/usermem"
+ "gvisor.dev/gvisor/pkg/hostarch"
)
// StubMarshallable implements the Marshallable interface.
@@ -63,16 +63,16 @@ func (StubMarshallable) UnmarshalUnsafe(src []byte) {
}
// CopyIn implements Marshallable.CopyIn.
-func (StubMarshallable) CopyIn(cc CopyContext, addr usermem.Addr) (int, error) {
+func (StubMarshallable) CopyIn(cc CopyContext, addr hostarch.Addr) (int, error) {
panic("Please implement your own CopyIn function")
}
// CopyOut implements Marshallable.CopyOut.
-func (StubMarshallable) CopyOut(cc CopyContext, addr usermem.Addr) (int, error) {
+func (StubMarshallable) CopyOut(cc CopyContext, addr hostarch.Addr) (int, error) {
panic("Please implement your own CopyOut function")
}
// CopyOutN implements Marshallable.CopyOutN.
-func (StubMarshallable) CopyOutN(cc CopyContext, addr usermem.Addr, limit int) (int, error) {
+func (StubMarshallable) CopyOutN(cc CopyContext, addr hostarch.Addr, limit int) (int, error) {
panic("Please implement your own CopyOutN function")
}
diff --git a/pkg/marshal/primitive/BUILD b/pkg/marshal/primitive/BUILD
index d77a11c79..190b57c29 100644
--- a/pkg/marshal/primitive/BUILD
+++ b/pkg/marshal/primitive/BUILD
@@ -13,6 +13,7 @@ go_library(
],
deps = [
"//pkg/context",
+ "//pkg/hostarch",
"//pkg/marshal",
"//pkg/usermem",
],
diff --git a/pkg/marshal/primitive/primitive.go b/pkg/marshal/primitive/primitive.go
index 4b342de6b..32c8ed138 100644
--- a/pkg/marshal/primitive/primitive.go
+++ b/pkg/marshal/primitive/primitive.go
@@ -20,6 +20,7 @@ import (
"io"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/marshal"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -102,17 +103,17 @@ func (b *ByteSlice) UnmarshalUnsafe(src []byte) {
}
// CopyIn implements marshal.Marshallable.CopyIn.
-func (b *ByteSlice) CopyIn(cc marshal.CopyContext, addr usermem.Addr) (int, error) {
+func (b *ByteSlice) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {
return cc.CopyInBytes(addr, *b)
}
// CopyOut implements marshal.Marshallable.CopyOut.
-func (b *ByteSlice) CopyOut(cc marshal.CopyContext, addr usermem.Addr) (int, error) {
+func (b *ByteSlice) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {
return cc.CopyOutBytes(addr, *b)
}
// CopyOutN implements marshal.Marshallable.CopyOutN.
-func (b *ByteSlice) CopyOutN(cc marshal.CopyContext, addr usermem.Addr, limit int) (int, error) {
+func (b *ByteSlice) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) {
return cc.CopyOutBytes(addr, (*b)[:limit])
}
@@ -131,7 +132,7 @@ var _ marshal.Marshallable = (*ByteSlice)(nil)
// CopyInt8In is a convenient wrapper for copying in an int8 from the task's
// memory.
-func CopyInt8In(cc marshal.CopyContext, addr usermem.Addr, dst *int8) (int, error) {
+func CopyInt8In(cc marshal.CopyContext, addr hostarch.Addr, dst *int8) (int, error) {
var buf Int8
n, err := buf.CopyIn(cc, addr)
if err != nil {
@@ -143,14 +144,14 @@ func CopyInt8In(cc marshal.CopyContext, addr usermem.Addr, dst *int8) (int, erro
// CopyInt8Out is a convenient wrapper for copying out an int8 to the task's
// memory.
-func CopyInt8Out(cc marshal.CopyContext, addr usermem.Addr, src int8) (int, error) {
+func CopyInt8Out(cc marshal.CopyContext, addr hostarch.Addr, src int8) (int, error) {
srcP := Int8(src)
return srcP.CopyOut(cc, addr)
}
// CopyUint8In is a convenient wrapper for copying in a uint8 from the task's
// memory.
-func CopyUint8In(cc marshal.CopyContext, addr usermem.Addr, dst *uint8) (int, error) {
+func CopyUint8In(cc marshal.CopyContext, addr hostarch.Addr, dst *uint8) (int, error) {
var buf Uint8
n, err := buf.CopyIn(cc, addr)
if err != nil {
@@ -162,7 +163,7 @@ func CopyUint8In(cc marshal.CopyContext, addr usermem.Addr, dst *uint8) (int, er
// CopyUint8Out is a convenient wrapper for copying out a uint8 to the task's
// memory.
-func CopyUint8Out(cc marshal.CopyContext, addr usermem.Addr, src uint8) (int, error) {
+func CopyUint8Out(cc marshal.CopyContext, addr hostarch.Addr, src uint8) (int, error) {
srcP := Uint8(src)
return srcP.CopyOut(cc, addr)
}
@@ -171,7 +172,7 @@ func CopyUint8Out(cc marshal.CopyContext, addr usermem.Addr, src uint8) (int, er
// CopyInt16In is a convenient wrapper for copying in an int16 from the task's
// memory.
-func CopyInt16In(cc marshal.CopyContext, addr usermem.Addr, dst *int16) (int, error) {
+func CopyInt16In(cc marshal.CopyContext, addr hostarch.Addr, dst *int16) (int, error) {
var buf Int16
n, err := buf.CopyIn(cc, addr)
if err != nil {
@@ -183,14 +184,14 @@ func CopyInt16In(cc marshal.CopyContext, addr usermem.Addr, dst *int16) (int, er
// CopyInt16Out is a convenient wrapper for copying out an int16 to the task's
// memory.
-func CopyInt16Out(cc marshal.CopyContext, addr usermem.Addr, src int16) (int, error) {
+func CopyInt16Out(cc marshal.CopyContext, addr hostarch.Addr, src int16) (int, error) {
srcP := Int16(src)
return srcP.CopyOut(cc, addr)
}
// CopyUint16In is a convenient wrapper for copying in a uint16 from the task's
// memory.
-func CopyUint16In(cc marshal.CopyContext, addr usermem.Addr, dst *uint16) (int, error) {
+func CopyUint16In(cc marshal.CopyContext, addr hostarch.Addr, dst *uint16) (int, error) {
var buf Uint16
n, err := buf.CopyIn(cc, addr)
if err != nil {
@@ -202,7 +203,7 @@ func CopyUint16In(cc marshal.CopyContext, addr usermem.Addr, dst *uint16) (int,
// CopyUint16Out is a convenient wrapper for copying out a uint16 to the task's
// memory.
-func CopyUint16Out(cc marshal.CopyContext, addr usermem.Addr, src uint16) (int, error) {
+func CopyUint16Out(cc marshal.CopyContext, addr hostarch.Addr, src uint16) (int, error) {
srcP := Uint16(src)
return srcP.CopyOut(cc, addr)
}
@@ -211,7 +212,7 @@ func CopyUint16Out(cc marshal.CopyContext, addr usermem.Addr, src uint16) (int,
// CopyInt32In is a convenient wrapper for copying in an int32 from the task's
// memory.
-func CopyInt32In(cc marshal.CopyContext, addr usermem.Addr, dst *int32) (int, error) {
+func CopyInt32In(cc marshal.CopyContext, addr hostarch.Addr, dst *int32) (int, error) {
var buf Int32
n, err := buf.CopyIn(cc, addr)
if err != nil {
@@ -223,14 +224,14 @@ func CopyInt32In(cc marshal.CopyContext, addr usermem.Addr, dst *int32) (int, er
// CopyInt32Out is a convenient wrapper for copying out an int32 to the task's
// memory.
-func CopyInt32Out(cc marshal.CopyContext, addr usermem.Addr, src int32) (int, error) {
+func CopyInt32Out(cc marshal.CopyContext, addr hostarch.Addr, src int32) (int, error) {
srcP := Int32(src)
return srcP.CopyOut(cc, addr)
}
// CopyUint32In is a convenient wrapper for copying in a uint32 from the task's
// memory.
-func CopyUint32In(cc marshal.CopyContext, addr usermem.Addr, dst *uint32) (int, error) {
+func CopyUint32In(cc marshal.CopyContext, addr hostarch.Addr, dst *uint32) (int, error) {
var buf Uint32
n, err := buf.CopyIn(cc, addr)
if err != nil {
@@ -242,7 +243,7 @@ func CopyUint32In(cc marshal.CopyContext, addr usermem.Addr, dst *uint32) (int,
// CopyUint32Out is a convenient wrapper for copying out a uint32 to the task's
// memory.
-func CopyUint32Out(cc marshal.CopyContext, addr usermem.Addr, src uint32) (int, error) {
+func CopyUint32Out(cc marshal.CopyContext, addr hostarch.Addr, src uint32) (int, error) {
srcP := Uint32(src)
return srcP.CopyOut(cc, addr)
}
@@ -251,7 +252,7 @@ func CopyUint32Out(cc marshal.CopyContext, addr usermem.Addr, src uint32) (int,
// CopyInt64In is a convenient wrapper for copying in an int64 from the task's
// memory.
-func CopyInt64In(cc marshal.CopyContext, addr usermem.Addr, dst *int64) (int, error) {
+func CopyInt64In(cc marshal.CopyContext, addr hostarch.Addr, dst *int64) (int, error) {
var buf Int64
n, err := buf.CopyIn(cc, addr)
if err != nil {
@@ -263,14 +264,14 @@ func CopyInt64In(cc marshal.CopyContext, addr usermem.Addr, dst *int64) (int, er
// CopyInt64Out is a convenient wrapper for copying out an int64 to the task's
// memory.
-func CopyInt64Out(cc marshal.CopyContext, addr usermem.Addr, src int64) (int, error) {
+func CopyInt64Out(cc marshal.CopyContext, addr hostarch.Addr, src int64) (int, error) {
srcP := Int64(src)
return srcP.CopyOut(cc, addr)
}
// CopyUint64In is a convenient wrapper for copying in a uint64 from the task's
// memory.
-func CopyUint64In(cc marshal.CopyContext, addr usermem.Addr, dst *uint64) (int, error) {
+func CopyUint64In(cc marshal.CopyContext, addr hostarch.Addr, dst *uint64) (int, error) {
var buf Uint64
n, err := buf.CopyIn(cc, addr)
if err != nil {
@@ -282,14 +283,14 @@ func CopyUint64In(cc marshal.CopyContext, addr usermem.Addr, dst *uint64) (int,
// CopyUint64Out is a convenient wrapper for copying out a uint64 to the task's
// memory.
-func CopyUint64Out(cc marshal.CopyContext, addr usermem.Addr, src uint64) (int, error) {
+func CopyUint64Out(cc marshal.CopyContext, addr hostarch.Addr, src uint64) (int, error) {
srcP := Uint64(src)
return srcP.CopyOut(cc, addr)
}
// CopyByteSliceIn is a convenient wrapper for copying in a []byte from the
// task's memory.
-func CopyByteSliceIn(cc marshal.CopyContext, addr usermem.Addr, dst *[]byte) (int, error) {
+func CopyByteSliceIn(cc marshal.CopyContext, addr hostarch.Addr, dst *[]byte) (int, error) {
var buf ByteSlice
n, err := buf.CopyIn(cc, addr)
if err != nil {
@@ -301,14 +302,14 @@ func CopyByteSliceIn(cc marshal.CopyContext, addr usermem.Addr, dst *[]byte) (in
// CopyByteSliceOut is a convenient wrapper for copying out a []byte to the
// task's memory.
-func CopyByteSliceOut(cc marshal.CopyContext, addr usermem.Addr, src []byte) (int, error) {
+func CopyByteSliceOut(cc marshal.CopyContext, addr hostarch.Addr, src []byte) (int, error) {
srcP := ByteSlice(src)
return srcP.CopyOut(cc, addr)
}
// CopyStringIn is a convenient wrapper for copying in a string from the
// task's memory.
-func CopyStringIn(cc marshal.CopyContext, addr usermem.Addr, dst *string) (int, error) {
+func CopyStringIn(cc marshal.CopyContext, addr hostarch.Addr, dst *string) (int, error) {
var buf ByteSlice
n, err := buf.CopyIn(cc, addr)
if err != nil {
@@ -320,12 +321,12 @@ func CopyStringIn(cc marshal.CopyContext, addr usermem.Addr, dst *string) (int,
// CopyStringOut is a convenient wrapper for copying out a string to the task's
// memory.
-func CopyStringOut(cc marshal.CopyContext, addr usermem.Addr, src string) (int, error) {
+func CopyStringOut(cc marshal.CopyContext, addr hostarch.Addr, src string) (int, error) {
srcP := ByteSlice(src)
return srcP.CopyOut(cc, addr)
}
-// IOCopyContext wraps an object implementing usermem.IO to implement
+// IOCopyContext wraps an object implementing hostarch.IO to implement
// marshal.CopyContext.
type IOCopyContext struct {
Ctx context.Context
@@ -339,11 +340,11 @@ func (i *IOCopyContext) CopyScratchBuffer(size int) []byte {
}
// CopyOutBytes implements marshal.CopyContext.CopyOutBytes.
-func (i *IOCopyContext) CopyOutBytes(addr usermem.Addr, b []byte) (int, error) {
+func (i *IOCopyContext) CopyOutBytes(addr hostarch.Addr, b []byte) (int, error) {
return i.IO.CopyOut(i.Ctx, addr, b, i.Opts)
}
// CopyInBytes implements marshal.CopyContext.CopyInBytes.
-func (i *IOCopyContext) CopyInBytes(addr usermem.Addr, b []byte) (int, error) {
+func (i *IOCopyContext) CopyInBytes(addr hostarch.Addr, b []byte) (int, error) {
return i.IO.CopyIn(i.Ctx, addr, b, i.Opts)
}
diff --git a/pkg/merkletree/BUILD b/pkg/merkletree/BUILD
index 501a9ef21..dcd6c3bf5 100644
--- a/pkg/merkletree/BUILD
+++ b/pkg/merkletree/BUILD
@@ -8,7 +8,7 @@ go_library(
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
- "//pkg/usermem",
+ "//pkg/hostarch",
],
)
@@ -18,6 +18,6 @@ go_test(
library = ":merkletree",
deps = [
"//pkg/abi/linux",
- "//pkg/usermem",
+ "//pkg/hostarch",
],
)
diff --git a/pkg/merkletree/merkletree.go b/pkg/merkletree/merkletree.go
index d7209ace3..6450f664c 100644
--- a/pkg/merkletree/merkletree.go
+++ b/pkg/merkletree/merkletree.go
@@ -24,7 +24,8 @@ import (
"io"
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/usermem"
+
+ "gvisor.dev/gvisor/pkg/hostarch"
)
const (
@@ -65,7 +66,7 @@ type Layout struct {
// of a tree. dataSize specifies the size of input data in bytes.
func InitLayout(dataSize int64, hashAlgorithms int, dataAndTreeInSameFile bool) (Layout, error) {
layout := Layout{
- blockSize: usermem.PageSize,
+ blockSize: hostarch.PageSize,
}
// TODO(b/156980949): Allow config SHA384.
@@ -237,6 +238,7 @@ func Generate(params *GenerateParams) ([]byte, error) {
Mode: params.Mode,
UID: params.UID,
GID: params.GID,
+ Children: params.Children,
SymlinkTarget: params.SymlinkTarget,
}
diff --git a/pkg/merkletree/merkletree_test.go b/pkg/merkletree/merkletree_test.go
index ed332b3f1..5d6f8df1b 100644
--- a/pkg/merkletree/merkletree_test.go
+++ b/pkg/merkletree/merkletree_test.go
@@ -24,7 +24,8 @@ import (
"time"
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/usermem"
+
+ "gvisor.dev/gvisor/pkg/hostarch"
)
func TestLayout(t *testing.T) {
@@ -58,7 +59,7 @@ func TestLayout(t *testing.T) {
hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA256,
dataAndTreeInSameFile: true,
expectedDigestSize: 32,
- expectedLevelOffset: []int64{usermem.PageSize},
+ expectedLevelOffset: []int64{hostarch.PageSize},
},
{
name: "SmallSizeSHA512SameFile",
@@ -66,7 +67,7 @@ func TestLayout(t *testing.T) {
hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA512,
dataAndTreeInSameFile: true,
expectedDigestSize: 64,
- expectedLevelOffset: []int64{usermem.PageSize},
+ expectedLevelOffset: []int64{hostarch.PageSize},
},
{
name: "MiddleSizeSHA256SeparateFile",
@@ -74,7 +75,7 @@ func TestLayout(t *testing.T) {
hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA256,
dataAndTreeInSameFile: false,
expectedDigestSize: 32,
- expectedLevelOffset: []int64{0, 2 * usermem.PageSize, 3 * usermem.PageSize},
+ expectedLevelOffset: []int64{0, 2 * hostarch.PageSize, 3 * hostarch.PageSize},
},
{
name: "MiddleSizeSHA512SeparateFile",
@@ -82,7 +83,7 @@ func TestLayout(t *testing.T) {
hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA512,
dataAndTreeInSameFile: false,
expectedDigestSize: 64,
- expectedLevelOffset: []int64{0, 4 * usermem.PageSize, 5 * usermem.PageSize},
+ expectedLevelOffset: []int64{0, 4 * hostarch.PageSize, 5 * hostarch.PageSize},
},
{
name: "MiddleSizeSHA256SameFile",
@@ -90,7 +91,7 @@ func TestLayout(t *testing.T) {
hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA256,
dataAndTreeInSameFile: true,
expectedDigestSize: 32,
- expectedLevelOffset: []int64{245 * usermem.PageSize, 247 * usermem.PageSize, 248 * usermem.PageSize},
+ expectedLevelOffset: []int64{245 * hostarch.PageSize, 247 * hostarch.PageSize, 248 * hostarch.PageSize},
},
{
name: "MiddleSizeSHA512SameFile",
@@ -98,39 +99,39 @@ func TestLayout(t *testing.T) {
hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA512,
dataAndTreeInSameFile: true,
expectedDigestSize: 64,
- expectedLevelOffset: []int64{245 * usermem.PageSize, 249 * usermem.PageSize, 250 * usermem.PageSize},
+ expectedLevelOffset: []int64{245 * hostarch.PageSize, 249 * hostarch.PageSize, 250 * hostarch.PageSize},
},
{
name: "LargeSizeSHA256SeparateFile",
- dataSize: 4096 * int64(usermem.PageSize),
+ dataSize: 4096 * int64(hostarch.PageSize),
hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA256,
dataAndTreeInSameFile: false,
expectedDigestSize: 32,
- expectedLevelOffset: []int64{0, 32 * usermem.PageSize, 33 * usermem.PageSize},
+ expectedLevelOffset: []int64{0, 32 * hostarch.PageSize, 33 * hostarch.PageSize},
},
{
name: "LargeSizeSHA512SeparateFile",
- dataSize: 4096 * int64(usermem.PageSize),
+ dataSize: 4096 * int64(hostarch.PageSize),
hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA512,
dataAndTreeInSameFile: false,
expectedDigestSize: 64,
- expectedLevelOffset: []int64{0, 64 * usermem.PageSize, 65 * usermem.PageSize},
+ expectedLevelOffset: []int64{0, 64 * hostarch.PageSize, 65 * hostarch.PageSize},
},
{
name: "LargeSizeSHA256SameFile",
- dataSize: 4096 * int64(usermem.PageSize),
+ dataSize: 4096 * int64(hostarch.PageSize),
hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA256,
dataAndTreeInSameFile: true,
expectedDigestSize: 32,
- expectedLevelOffset: []int64{4096 * usermem.PageSize, 4128 * usermem.PageSize, 4129 * usermem.PageSize},
+ expectedLevelOffset: []int64{4096 * hostarch.PageSize, 4128 * hostarch.PageSize, 4129 * hostarch.PageSize},
},
{
name: "LargeSizeSHA512SameFile",
- dataSize: 4096 * int64(usermem.PageSize),
+ dataSize: 4096 * int64(hostarch.PageSize),
hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA512,
dataAndTreeInSameFile: true,
expectedDigestSize: 64,
- expectedLevelOffset: []int64{4096 * usermem.PageSize, 4160 * usermem.PageSize, 4161 * usermem.PageSize},
+ expectedLevelOffset: []int64{4096 * hostarch.PageSize, 4160 * hostarch.PageSize, 4161 * hostarch.PageSize},
},
}
@@ -140,8 +141,8 @@ func TestLayout(t *testing.T) {
if err != nil {
t.Fatalf("Failed to InitLayout: %v", err)
}
- if l.blockSize != int64(usermem.PageSize) {
- t.Errorf("Got blockSize %d, want %d", l.blockSize, usermem.PageSize)
+ if l.blockSize != int64(hostarch.PageSize) {
+ t.Errorf("Got blockSize %d, want %d", l.blockSize, hostarch.PageSize)
}
if l.digestSize != tc.expectedDigestSize {
t.Errorf("Got digestSize %d, want %d", l.digestSize, sha256DigestSize)
@@ -202,56 +203,56 @@ func TestGenerate(t *testing.T) {
}{
{
name: "OnePageZeroesSHA256SeparateFile",
- data: bytes.Repeat([]byte{0}, usermem.PageSize),
+ data: bytes.Repeat([]byte{0}, hostarch.PageSize),
hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA256,
dataAndTreeInSameFile: false,
expectedHash: []byte{9, 115, 238, 230, 38, 140, 195, 70, 207, 144, 202, 118, 23, 113, 32, 129, 226, 239, 177, 69, 161, 26, 14, 113, 16, 37, 30, 96, 19, 148, 132, 27},
},
{
name: "OnePageZeroesSHA256SameFile",
- data: bytes.Repeat([]byte{0}, usermem.PageSize),
+ data: bytes.Repeat([]byte{0}, hostarch.PageSize),
hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA256,
dataAndTreeInSameFile: true,
expectedHash: []byte{9, 115, 238, 230, 38, 140, 195, 70, 207, 144, 202, 118, 23, 113, 32, 129, 226, 239, 177, 69, 161, 26, 14, 113, 16, 37, 30, 96, 19, 148, 132, 27},
},
{
name: "OnePageZeroesSHA512SeparateFile",
- data: bytes.Repeat([]byte{0}, usermem.PageSize),
+ data: bytes.Repeat([]byte{0}, hostarch.PageSize),
hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA512,
dataAndTreeInSameFile: false,
expectedHash: []byte{127, 8, 95, 11, 83, 101, 51, 39, 170, 235, 39, 43, 135, 243, 145, 118, 148, 58, 27, 155, 182, 205, 44, 47, 5, 223, 215, 17, 35, 16, 43, 104, 43, 11, 8, 88, 171, 7, 249, 243, 14, 62, 126, 218, 23, 159, 237, 237, 42, 226, 39, 25, 87, 48, 253, 191, 116, 213, 37, 3, 187, 152, 154, 14},
},
{
name: "OnePageZeroesSHA512SameFile",
- data: bytes.Repeat([]byte{0}, usermem.PageSize),
+ data: bytes.Repeat([]byte{0}, hostarch.PageSize),
hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA512,
dataAndTreeInSameFile: true,
expectedHash: []byte{127, 8, 95, 11, 83, 101, 51, 39, 170, 235, 39, 43, 135, 243, 145, 118, 148, 58, 27, 155, 182, 205, 44, 47, 5, 223, 215, 17, 35, 16, 43, 104, 43, 11, 8, 88, 171, 7, 249, 243, 14, 62, 126, 218, 23, 159, 237, 237, 42, 226, 39, 25, 87, 48, 253, 191, 116, 213, 37, 3, 187, 152, 154, 14},
},
{
name: "MultiplePageZeroesSHA256SeparateFile",
- data: bytes.Repeat([]byte{0}, 128*usermem.PageSize+1),
+ data: bytes.Repeat([]byte{0}, 128*hostarch.PageSize+1),
hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA256,
dataAndTreeInSameFile: false,
expectedHash: []byte{247, 158, 42, 215, 180, 106, 0, 28, 77, 64, 132, 162, 74, 65, 250, 161, 243, 66, 129, 44, 197, 8, 145, 14, 94, 206, 156, 184, 145, 145, 20, 185},
},
{
name: "MultiplePageZeroesSHA256SameFile",
- data: bytes.Repeat([]byte{0}, 128*usermem.PageSize+1),
+ data: bytes.Repeat([]byte{0}, 128*hostarch.PageSize+1),
hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA256,
dataAndTreeInSameFile: true,
expectedHash: []byte{247, 158, 42, 215, 180, 106, 0, 28, 77, 64, 132, 162, 74, 65, 250, 161, 243, 66, 129, 44, 197, 8, 145, 14, 94, 206, 156, 184, 145, 145, 20, 185},
},
{
name: "MultiplePageZeroesSHA512SeparateFile",
- data: bytes.Repeat([]byte{0}, 128*usermem.PageSize+1),
+ data: bytes.Repeat([]byte{0}, 128*hostarch.PageSize+1),
hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA512,
dataAndTreeInSameFile: false,
expectedHash: []byte{100, 121, 14, 30, 104, 200, 142, 182, 190, 78, 23, 68, 157, 174, 23, 75, 174, 250, 250, 25, 66, 45, 235, 103, 129, 49, 78, 127, 173, 154, 121, 35, 37, 115, 60, 217, 26, 205, 253, 253, 236, 145, 107, 109, 232, 19, 72, 92, 4, 191, 181, 205, 191, 57, 234, 177, 144, 235, 143, 30, 15, 197, 109, 81},
},
{
name: "MultiplePageZeroesSHA512SameFile",
- data: bytes.Repeat([]byte{0}, 128*usermem.PageSize+1),
+ data: bytes.Repeat([]byte{0}, 128*hostarch.PageSize+1),
hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA512,
dataAndTreeInSameFile: true,
expectedHash: []byte{100, 121, 14, 30, 104, 200, 142, 182, 190, 78, 23, 68, 157, 174, 23, 75, 174, 250, 250, 25, 66, 45, 235, 103, 129, 49, 78, 127, 173, 154, 121, 35, 37, 115, 60, 217, 26, 205, 253, 253, 236, 145, 107, 109, 232, 19, 72, 92, 4, 191, 181, 205, 191, 57, 234, 177, 144, 235, 143, 30, 15, 197, 109, 81},
@@ -286,28 +287,28 @@ func TestGenerate(t *testing.T) {
},
{
name: "OnePageASHA256SeparateFile",
- data: bytes.Repeat([]byte{'a'}, usermem.PageSize),
+ data: bytes.Repeat([]byte{'a'}, hostarch.PageSize),
hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA256,
dataAndTreeInSameFile: false,
expectedHash: []byte{132, 54, 112, 142, 156, 19, 50, 140, 138, 240, 192, 154, 100, 120, 242, 69, 64, 217, 62, 166, 127, 88, 23, 197, 100, 66, 255, 215, 214, 229, 54, 1},
},
{
name: "OnePageASHA256SameFile",
- data: bytes.Repeat([]byte{'a'}, usermem.PageSize),
+ data: bytes.Repeat([]byte{'a'}, hostarch.PageSize),
hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA256,
dataAndTreeInSameFile: true,
expectedHash: []byte{132, 54, 112, 142, 156, 19, 50, 140, 138, 240, 192, 154, 100, 120, 242, 69, 64, 217, 62, 166, 127, 88, 23, 197, 100, 66, 255, 215, 214, 229, 54, 1},
},
{
name: "OnePageASHA512SeparateFile",
- data: bytes.Repeat([]byte{'a'}, usermem.PageSize),
+ data: bytes.Repeat([]byte{'a'}, hostarch.PageSize),
hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA512,
dataAndTreeInSameFile: false,
expectedHash: []byte{165, 46, 176, 116, 47, 209, 101, 193, 64, 185, 30, 9, 52, 22, 24, 154, 135, 220, 232, 168, 215, 45, 222, 226, 207, 104, 160, 10, 156, 98, 245, 250, 76, 21, 68, 204, 65, 118, 69, 52, 210, 155, 36, 109, 233, 103, 1, 40, 218, 89, 125, 38, 247, 194, 2, 225, 119, 155, 65, 99, 182, 111, 110, 145},
},
{
name: "OnePageASHA512SameFile",
- data: bytes.Repeat([]byte{'a'}, usermem.PageSize),
+ data: bytes.Repeat([]byte{'a'}, hostarch.PageSize),
hashAlgorithms: linux.FS_VERITY_HASH_ALG_SHA512,
dataAndTreeInSameFile: true,
expectedHash: []byte{165, 46, 176, 116, 47, 209, 101, 193, 64, 185, 30, 9, 52, 22, 24, 154, 135, 220, 232, 168, 215, 45, 222, 226, 207, 104, 160, 10, 156, 98, 245, 250, 76, 21, 68, 204, 65, 118, 69, 52, 210, 155, 36, 109, 233, 103, 1, 40, 218, 89, 125, 38, 247, 194, 2, 225, 119, 155, 65, 99, 182, 111, 110, 145},
@@ -415,14 +416,14 @@ func TestVerifyInvalidRange(t *testing.T) {
// Verify range starts outside data range.
{
name: "StartOutsideRange",
- verifyStart: usermem.PageSize,
+ verifyStart: hostarch.PageSize,
verifySize: 1,
},
// Verify range ends outside data range.
{
name: "EndOutsideRange",
verifyStart: 0,
- verifySize: 2 * usermem.PageSize,
+ verifySize: 2 * hostarch.PageSize,
},
// Verify range with negative size.
{
@@ -434,7 +435,7 @@ func TestVerifyInvalidRange(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
var buf bytes.Buffer
- _, params := prepareVerify(t, usermem.PageSize /* dataSize */, defaultHashAlgorithm, false /* dataAndTreeInSameFile */, false /* isSymlink */, tc.verifyStart, tc.verifySize, &buf)
+ _, params := prepareVerify(t, hostarch.PageSize /* dataSize */, defaultHashAlgorithm, false /* dataAndTreeInSameFile */, false /* isSymlink */, tc.verifyStart, tc.verifySize, &buf)
if _, err := Verify(&params); errors.Is(err, nil) {
t.Errorf("Verification succeeded when expected to fail")
}
@@ -467,7 +468,7 @@ func TestVerifyUnmodifiedMetadata(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
var buf bytes.Buffer
- _, params := prepareVerify(t, usermem.PageSize /* dataSize */, defaultHashAlgorithm, tc.dataAndTreeInSameFile, tc.isSymlink, 0 /* verifyStart */, 0 /* verifySize */, &buf)
+ _, params := prepareVerify(t, hostarch.PageSize /* dataSize */, defaultHashAlgorithm, tc.dataAndTreeInSameFile, tc.isSymlink, 0 /* verifyStart */, 0 /* verifySize */, &buf)
if tc.isSymlink {
params.SymlinkTarget = defaultSymlinkPath
}
@@ -495,7 +496,7 @@ func TestVerifyModifiedName(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
var buf bytes.Buffer
- _, params := prepareVerify(t, usermem.PageSize /* dataSize */, defaultHashAlgorithm, tc.dataAndTreeInSameFile, false /* isSymlink */, 0 /* verifyStart */, 0 /* verifySize */, &buf)
+ _, params := prepareVerify(t, hostarch.PageSize /* dataSize */, defaultHashAlgorithm, tc.dataAndTreeInSameFile, false /* isSymlink */, 0 /* verifyStart */, 0 /* verifySize */, &buf)
params.Name += "abc"
if _, err := Verify(&params); errors.Is(err, nil) {
t.Errorf("Verification succeeded when expected to fail")
@@ -521,7 +522,7 @@ func TestVerifyModifiedSize(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
var buf bytes.Buffer
- _, params := prepareVerify(t, usermem.PageSize /* dataSize */, defaultHashAlgorithm, tc.dataAndTreeInSameFile, false /* isSymlink */, 0 /* verifyStart */, 0 /* verifySize */, &buf)
+ _, params := prepareVerify(t, hostarch.PageSize /* dataSize */, defaultHashAlgorithm, tc.dataAndTreeInSameFile, false /* isSymlink */, 0 /* verifyStart */, 0 /* verifySize */, &buf)
params.Size--
if _, err := Verify(&params); errors.Is(err, nil) {
t.Errorf("Verification succeeded when expected to fail")
@@ -547,7 +548,7 @@ func TestVerifyModifiedMode(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
var buf bytes.Buffer
- _, params := prepareVerify(t, usermem.PageSize /* dataSize */, defaultHashAlgorithm, tc.dataAndTreeInSameFile, false /* isSymlink */, 0 /* verifyStart */, 0 /* verifySize */, &buf)
+ _, params := prepareVerify(t, hostarch.PageSize /* dataSize */, defaultHashAlgorithm, tc.dataAndTreeInSameFile, false /* isSymlink */, 0 /* verifyStart */, 0 /* verifySize */, &buf)
params.Mode++
if _, err := Verify(&params); errors.Is(err, nil) {
t.Errorf("Verification succeeded when expected to fail")
@@ -573,7 +574,7 @@ func TestVerifyModifiedUID(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
var buf bytes.Buffer
- _, params := prepareVerify(t, usermem.PageSize /* dataSize */, defaultHashAlgorithm, tc.dataAndTreeInSameFile, false /* isSymlink */, 0 /* verifyStart */, 0 /* verifySize */, &buf)
+ _, params := prepareVerify(t, hostarch.PageSize /* dataSize */, defaultHashAlgorithm, tc.dataAndTreeInSameFile, false /* isSymlink */, 0 /* verifyStart */, 0 /* verifySize */, &buf)
params.UID++
if _, err := Verify(&params); errors.Is(err, nil) {
t.Errorf("Verification succeeded when expected to fail")
@@ -599,7 +600,7 @@ func TestVerifyModifiedGID(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
var buf bytes.Buffer
- _, params := prepareVerify(t, usermem.PageSize /* dataSize */, defaultHashAlgorithm, tc.dataAndTreeInSameFile, false /* isSymlink */, 0 /* verifyStart */, 0 /* verifySize */, &buf)
+ _, params := prepareVerify(t, hostarch.PageSize /* dataSize */, defaultHashAlgorithm, tc.dataAndTreeInSameFile, false /* isSymlink */, 0 /* verifyStart */, 0 /* verifySize */, &buf)
params.GID++
if _, err := Verify(&params); errors.Is(err, nil) {
t.Errorf("Verification succeeded when expected to fail")
@@ -625,7 +626,7 @@ func TestVerifyModifiedChildren(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
var buf bytes.Buffer
- _, params := prepareVerify(t, usermem.PageSize /* dataSize */, defaultHashAlgorithm, tc.dataAndTreeInSameFile, false /* isSymlink */, 0 /* verifyStart */, 0 /* verifySize */, &buf)
+ _, params := prepareVerify(t, hostarch.PageSize /* dataSize */, defaultHashAlgorithm, tc.dataAndTreeInSameFile, false /* isSymlink */, 0 /* verifyStart */, 0 /* verifySize */, &buf)
params.Children["abc"] = struct{}{}
if _, err := Verify(&params); errors.Is(err, nil) {
t.Errorf("Verification succeeded when expected to fail")
@@ -636,7 +637,7 @@ func TestVerifyModifiedChildren(t *testing.T) {
func TestVerifyModifiedSymlink(t *testing.T) {
var buf bytes.Buffer
- _, params := prepareVerify(t, usermem.PageSize /* dataSize */, defaultHashAlgorithm, false /* dataAndTreeInSameFile */, true /* isSymlink */, 0 /* verifyStart */, 0 /* verifySize */, &buf)
+ _, params := prepareVerify(t, hostarch.PageSize /* dataSize */, defaultHashAlgorithm, false /* dataAndTreeInSameFile */, true /* isSymlink */, 0 /* verifyStart */, 0 /* verifySize */, &buf)
params.SymlinkTarget = "merkle_modified_test_link"
if _, err := Verify(&params); err == nil {
t.Errorf("Verification succeeded when expected to fail")
@@ -652,30 +653,30 @@ func TestModifyOutsideVerifyRange(t *testing.T) {
}{
{
name: "BeforeRangeSeparateFile",
- modifyByte: 4*usermem.PageSize - 1,
+ modifyByte: 4*hostarch.PageSize - 1,
dataAndTreeInSameFile: false,
},
{
name: "BeforeRangeSameFile",
- modifyByte: 4*usermem.PageSize - 1,
+ modifyByte: 4*hostarch.PageSize - 1,
dataAndTreeInSameFile: true,
},
{
name: "AfterRangeSeparateFile",
- modifyByte: 5 * usermem.PageSize,
+ modifyByte: 5 * hostarch.PageSize,
dataAndTreeInSameFile: false,
},
{
name: "AfterRangeSameFile",
- modifyByte: 5 * usermem.PageSize,
+ modifyByte: 5 * hostarch.PageSize,
dataAndTreeInSameFile: true,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
- dataSize := int64(8 * usermem.PageSize)
- verifyStart := int64(4 * usermem.PageSize)
- verifySize := int64(usermem.PageSize)
+ dataSize := int64(8 * hostarch.PageSize)
+ verifyStart := int64(4 * hostarch.PageSize)
+ verifySize := int64(hostarch.PageSize)
var buf bytes.Buffer
// Modified byte is outside verify range. Verify should succeed.
data, params := prepareVerify(t, dataSize, defaultHashAlgorithm, tc.dataAndTreeInSameFile, false /* isSymlink */, verifyStart, verifySize, &buf)
@@ -712,16 +713,16 @@ func TestModifyInsideVerifyRange(t *testing.T) {
// to fail.
{
name: "BlockAlignedRangeSeparateFile",
- verifyStart: 4 * usermem.PageSize,
- verifySize: usermem.PageSize,
- modifyByte: 4 * usermem.PageSize,
+ verifyStart: 4 * hostarch.PageSize,
+ verifySize: hostarch.PageSize,
+ modifyByte: 4 * hostarch.PageSize,
dataAndTreeInSameFile: false,
},
{
name: "BlockAlignedRangeSameFile",
- verifyStart: 4 * usermem.PageSize,
- verifySize: usermem.PageSize,
- modifyByte: 4 * usermem.PageSize,
+ verifyStart: 4 * hostarch.PageSize,
+ verifySize: hostarch.PageSize,
+ modifyByte: 4 * hostarch.PageSize,
dataAndTreeInSameFile: true,
},
// The tests below use a non-block-aligned verify range.
@@ -729,48 +730,48 @@ func TestModifyInsideVerifyRange(t *testing.T) {
// verify to fail.
{
name: "ModifyStartSeparateFile",
- verifyStart: 4*usermem.PageSize + 123,
- verifySize: 2 * usermem.PageSize,
- modifyByte: 4*usermem.PageSize + 123,
+ verifyStart: 4*hostarch.PageSize + 123,
+ verifySize: 2 * hostarch.PageSize,
+ modifyByte: 4*hostarch.PageSize + 123,
dataAndTreeInSameFile: false,
},
{
name: "ModifyStartSameFile",
- verifyStart: 4*usermem.PageSize + 123,
- verifySize: 2 * usermem.PageSize,
- modifyByte: 4*usermem.PageSize + 123,
+ verifyStart: 4*hostarch.PageSize + 123,
+ verifySize: 2 * hostarch.PageSize,
+ modifyByte: 4*hostarch.PageSize + 123,
dataAndTreeInSameFile: true,
},
// Modifying a byte at the end of verify range should cause
// verify to fail.
{
name: "ModifyEndSeparateFile",
- verifyStart: 4*usermem.PageSize + 123,
- verifySize: 2 * usermem.PageSize,
- modifyByte: 6*usermem.PageSize + 123,
+ verifyStart: 4*hostarch.PageSize + 123,
+ verifySize: 2 * hostarch.PageSize,
+ modifyByte: 6*hostarch.PageSize + 123,
dataAndTreeInSameFile: false,
},
{
name: "ModifyEndSameFile",
- verifyStart: 4*usermem.PageSize + 123,
- verifySize: 2 * usermem.PageSize,
- modifyByte: 6*usermem.PageSize + 123,
+ verifyStart: 4*hostarch.PageSize + 123,
+ verifySize: 2 * hostarch.PageSize,
+ modifyByte: 6*hostarch.PageSize + 123,
dataAndTreeInSameFile: true,
},
// Modifying a byte in the middle verified block should cause
// verify to fail.
{
name: "ModifyMiddleSeparateFile",
- verifyStart: 4*usermem.PageSize + 123,
- verifySize: 2 * usermem.PageSize,
- modifyByte: 5*usermem.PageSize + 123,
+ verifyStart: 4*hostarch.PageSize + 123,
+ verifySize: 2 * hostarch.PageSize,
+ modifyByte: 5*hostarch.PageSize + 123,
dataAndTreeInSameFile: false,
},
{
name: "ModifyMiddleSameFile",
- verifyStart: 4*usermem.PageSize + 123,
- verifySize: 2 * usermem.PageSize,
- modifyByte: 5*usermem.PageSize + 123,
+ verifyStart: 4*hostarch.PageSize + 123,
+ verifySize: 2 * hostarch.PageSize,
+ modifyByte: 5*hostarch.PageSize + 123,
dataAndTreeInSameFile: true,
},
// Modifying a byte in the first block in the verified range
@@ -778,16 +779,16 @@ func TestModifyInsideVerifyRange(t *testing.T) {
// out of verify range.
{
name: "ModifyFirstBlockSeparateFile",
- verifyStart: 4*usermem.PageSize + 123,
- verifySize: 2 * usermem.PageSize,
- modifyByte: 4*usermem.PageSize + 122,
+ verifyStart: 4*hostarch.PageSize + 123,
+ verifySize: 2 * hostarch.PageSize,
+ modifyByte: 4*hostarch.PageSize + 122,
dataAndTreeInSameFile: false,
},
{
name: "ModifyFirstBlockSameFile",
- verifyStart: 4*usermem.PageSize + 123,
- verifySize: 2 * usermem.PageSize,
- modifyByte: 4*usermem.PageSize + 122,
+ verifyStart: 4*hostarch.PageSize + 123,
+ verifySize: 2 * hostarch.PageSize,
+ modifyByte: 4*hostarch.PageSize + 122,
dataAndTreeInSameFile: true,
},
// Modifying a byte in the last block in the verified range
@@ -795,22 +796,22 @@ func TestModifyInsideVerifyRange(t *testing.T) {
// out of verify range.
{
name: "ModifyLastBlockSeparateFile",
- verifyStart: 4*usermem.PageSize + 123,
- verifySize: 2 * usermem.PageSize,
- modifyByte: 6*usermem.PageSize + 124,
+ verifyStart: 4*hostarch.PageSize + 123,
+ verifySize: 2 * hostarch.PageSize,
+ modifyByte: 6*hostarch.PageSize + 124,
dataAndTreeInSameFile: false,
},
{
name: "ModifyLastBlockSameFile",
- verifyStart: 4*usermem.PageSize + 123,
- verifySize: 2 * usermem.PageSize,
- modifyByte: 6*usermem.PageSize + 124,
+ verifyStart: 4*hostarch.PageSize + 123,
+ verifySize: 2 * hostarch.PageSize,
+ modifyByte: 6*hostarch.PageSize + 124,
dataAndTreeInSameFile: true,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
- dataSize := int64(8 * usermem.PageSize)
+ dataSize := int64(8 * hostarch.PageSize)
var buf bytes.Buffer
data, params := prepareVerify(t, dataSize, defaultHashAlgorithm, tc.dataAndTreeInSameFile, false /* isSymlink */, tc.verifyStart, tc.verifySize, &buf)
// Flip a bit in data and checks Verify results.
@@ -854,7 +855,7 @@ func TestVerifyRandom(t *testing.T) {
rand.Seed(time.Now().UnixNano())
// Use a random dataSize. Minimum size 2 so that we can pick a random
// portion from it.
- dataSize := rand.Int63n(200*usermem.PageSize) + 2
+ dataSize := rand.Int63n(200*hostarch.PageSize) + 2
// Pick a random portion of data.
start := rand.Int63n(dataSize - 1)
diff --git a/pkg/ring0/BUILD b/pkg/ring0/BUILD
index 885958456..fda6ba601 100644
--- a/pkg/ring0/BUILD
+++ b/pkg/ring0/BUILD
@@ -77,10 +77,10 @@ go_library(
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/cpuid",
+ "//pkg/hostarch",
"//pkg/ring0/pagetables",
"//pkg/safecopy",
"//pkg/sentry/arch",
"//pkg/sentry/arch/fpu",
- "//pkg/usermem",
],
)
diff --git a/pkg/ring0/defs_amd64.go b/pkg/ring0/defs_amd64.go
index ceddf719d..76776c65c 100644
--- a/pkg/ring0/defs_amd64.go
+++ b/pkg/ring0/defs_amd64.go
@@ -17,7 +17,7 @@
package ring0
import (
- "gvisor.dev/gvisor/pkg/usermem"
+ "gvisor.dev/gvisor/pkg/hostarch"
)
var (
@@ -25,7 +25,7 @@ var (
UserspaceSize = uintptr(1) << (VirtualAddressBits() - 1)
// MaximumUserAddress is the largest possible user address.
- MaximumUserAddress = (UserspaceSize - 1) & ^uintptr(usermem.PageSize-1)
+ MaximumUserAddress = (UserspaceSize - 1) & ^uintptr(hostarch.PageSize-1)
// KernelStartAddress is the starting kernel address.
KernelStartAddress = ^uintptr(0) - (UserspaceSize - 1)
diff --git a/pkg/ring0/defs_arm64.go b/pkg/ring0/defs_arm64.go
index c372b02bb..0125690d2 100644
--- a/pkg/ring0/defs_arm64.go
+++ b/pkg/ring0/defs_arm64.go
@@ -17,7 +17,7 @@
package ring0
import (
- "gvisor.dev/gvisor/pkg/usermem"
+ "gvisor.dev/gvisor/pkg/hostarch"
)
var (
@@ -25,7 +25,7 @@ var (
UserspaceSize = uintptr(1) << (VirtualAddressBits())
// MaximumUserAddress is the largest possible user address.
- MaximumUserAddress = (UserspaceSize - 1) & ^uintptr(usermem.PageSize-1)
+ MaximumUserAddress = (UserspaceSize - 1) & ^uintptr(hostarch.PageSize-1)
// KernelStartAddress is the starting kernel address.
KernelStartAddress = ^uintptr(0) - (UserspaceSize - 1)
diff --git a/pkg/ring0/gen_offsets/BUILD b/pkg/ring0/gen_offsets/BUILD
index f421e1687..9ea8f9a4f 100644
--- a/pkg/ring0/gen_offsets/BUILD
+++ b/pkg/ring0/gen_offsets/BUILD
@@ -33,9 +33,9 @@ go_binary(
],
deps = [
"//pkg/cpuid",
+ "//pkg/hostarch",
"//pkg/ring0/pagetables",
"//pkg/sentry/arch",
"//pkg/sentry/arch/fpu",
- "//pkg/usermem",
],
)
diff --git a/pkg/ring0/kernel_amd64.go b/pkg/ring0/kernel_amd64.go
index 33c259757..92d2330cb 100644
--- a/pkg/ring0/kernel_amd64.go
+++ b/pkg/ring0/kernel_amd64.go
@@ -20,7 +20,7 @@ import (
"encoding/binary"
"reflect"
- "gvisor.dev/gvisor/pkg/usermem"
+ "gvisor.dev/gvisor/pkg/hostarch"
)
// init initializes architecture-specific state.
@@ -34,7 +34,7 @@ func (k *Kernel) init(maxCPUs int) {
entries = make([]kernelEntry, maxCPUs+padding-1)
totalSize := entrySize * uintptr(maxCPUs+padding-1)
addr := reflect.ValueOf(&entries[0]).Pointer()
- if addr&(usermem.PageSize-1) == 0 && totalSize >= usermem.PageSize {
+ if addr&(hostarch.PageSize-1) == 0 && totalSize >= hostarch.PageSize {
// The runtime forces power-of-2 alignment for allocations, and we are therefore
// safe once the first address is aligned and the chunk is at least a full page.
break
@@ -44,10 +44,10 @@ func (k *Kernel) init(maxCPUs int) {
k.cpuEntries = entries
k.globalIDT = &idt64{}
- if reflect.TypeOf(idt64{}).Size() != usermem.PageSize {
+ if reflect.TypeOf(idt64{}).Size() != hostarch.PageSize {
panic("Size of globalIDT should be PageSize")
}
- if reflect.ValueOf(k.globalIDT).Pointer()&(usermem.PageSize-1) != 0 {
+ if reflect.ValueOf(k.globalIDT).Pointer()&(hostarch.PageSize-1) != 0 {
panic("Allocated globalIDT should be page aligned")
}
@@ -71,13 +71,13 @@ func (k *Kernel) EntryRegions() map[uintptr]uintptr {
addr := reflect.ValueOf(&k.cpuEntries[0]).Pointer()
size := reflect.TypeOf(kernelEntry{}).Size() * uintptr(len(k.cpuEntries))
- end, _ := usermem.Addr(addr + size).RoundUp()
- regions[uintptr(usermem.Addr(addr).RoundDown())] = uintptr(end)
+ end, _ := hostarch.Addr(addr + size).RoundUp()
+ regions[uintptr(hostarch.Addr(addr).RoundDown())] = uintptr(end)
addr = reflect.ValueOf(k.globalIDT).Pointer()
size = reflect.TypeOf(idt64{}).Size()
- end, _ = usermem.Addr(addr + size).RoundUp()
- regions[uintptr(usermem.Addr(addr).RoundDown())] = uintptr(end)
+ end, _ = hostarch.Addr(addr + size).RoundUp()
+ regions[uintptr(hostarch.Addr(addr).RoundDown())] = uintptr(end)
return regions
}
diff --git a/pkg/ring0/lib_arm64.go b/pkg/ring0/lib_arm64.go
index f1c323bac..5eabd4296 100644
--- a/pkg/ring0/lib_arm64.go
+++ b/pkg/ring0/lib_arm64.go
@@ -65,9 +65,10 @@ func LoadFloatingPoint(*byte)
// SaveFloatingPoint saves floating point state.
func SaveFloatingPoint(*byte)
+// FPSIMDDisableTrap disables fpsimd.
func FPSIMDDisableTrap()
-// DisableVFP disables fpsimd.
+// FPSIMDEnableTrap enables fpsimd.
func FPSIMDEnableTrap()
// Init sets function pointers based on architectural features.
diff --git a/pkg/ring0/pagetables/BUILD b/pkg/ring0/pagetables/BUILD
index 65a978cbb..f8f160cc6 100644
--- a/pkg/ring0/pagetables/BUILD
+++ b/pkg/ring0/pagetables/BUILD
@@ -68,8 +68,8 @@ go_library(
"//pkg/sentry/platform/kvm:__subpackages__",
],
deps = [
+ "//pkg/hostarch",
"//pkg/sync",
- "//pkg/usermem",
],
)
@@ -84,5 +84,8 @@ go_test(
":walker_check_arm64",
],
library = ":pagetables",
- deps = ["//pkg/usermem"],
+ deps = [
+ "//pkg/hostarch",
+ "//pkg/usermem",
+ ],
)
diff --git a/pkg/ring0/pagetables/allocator_unsafe.go b/pkg/ring0/pagetables/allocator_unsafe.go
index d08bfdeb3..191d0942b 100644
--- a/pkg/ring0/pagetables/allocator_unsafe.go
+++ b/pkg/ring0/pagetables/allocator_unsafe.go
@@ -17,23 +17,23 @@ package pagetables
import (
"unsafe"
- "gvisor.dev/gvisor/pkg/usermem"
+ "gvisor.dev/gvisor/pkg/hostarch"
)
// newAlignedPTEs returns a set of aligned PTEs.
func newAlignedPTEs() *PTEs {
ptes := new(PTEs)
- offset := physicalFor(ptes) & (usermem.PageSize - 1)
+ offset := physicalFor(ptes) & (hostarch.PageSize - 1)
if offset == 0 {
// Already aligned.
return ptes
}
// Need to force an aligned allocation.
- unaligned := make([]byte, (2*usermem.PageSize)-1)
- offset = uintptr(unsafe.Pointer(&unaligned[0])) & (usermem.PageSize - 1)
+ unaligned := make([]byte, (2*hostarch.PageSize)-1)
+ offset = uintptr(unsafe.Pointer(&unaligned[0])) & (hostarch.PageSize - 1)
if offset != 0 {
- offset = usermem.PageSize - offset
+ offset = hostarch.PageSize - offset
}
return (*PTEs)(unsafe.Pointer(&unaligned[offset]))
}
diff --git a/pkg/ring0/pagetables/pagetables.go b/pkg/ring0/pagetables/pagetables.go
index 8c0a6aa82..3f17fba49 100644
--- a/pkg/ring0/pagetables/pagetables.go
+++ b/pkg/ring0/pagetables/pagetables.go
@@ -21,7 +21,7 @@
package pagetables
import (
- "gvisor.dev/gvisor/pkg/usermem"
+ "gvisor.dev/gvisor/pkg/hostarch"
)
// PageTables is a set of page tables.
@@ -142,7 +142,7 @@ func (*mapVisitor) requiresSplit() bool { return true }
//
// +checkescape:hard,stack
//go:nosplit
-func (p *PageTables) Map(addr usermem.Addr, length uintptr, opts MapOpts, physical uintptr) bool {
+func (p *PageTables) Map(addr hostarch.Addr, length uintptr, opts MapOpts, physical uintptr) bool {
if p.readOnlyShared {
panic("Should not modify read-only shared pagetables.")
}
@@ -198,7 +198,7 @@ func (v *unmapVisitor) visit(start uintptr, pte *PTE, align uintptr) bool {
//
// +checkescape:hard,stack
//go:nosplit
-func (p *PageTables) Unmap(addr usermem.Addr, length uintptr) bool {
+func (p *PageTables) Unmap(addr hostarch.Addr, length uintptr) bool {
if p.readOnlyShared {
panic("Should not modify read-only shared pagetables.")
}
@@ -249,7 +249,7 @@ func (v *emptyVisitor) visit(start uintptr, pte *PTE, align uintptr) bool {
//
// +checkescape:hard,stack
//go:nosplit
-func (p *PageTables) IsEmpty(addr usermem.Addr, length uintptr) bool {
+func (p *PageTables) IsEmpty(addr hostarch.Addr, length uintptr) bool {
w := emptyWalker{
pageTables: p,
}
@@ -298,9 +298,9 @@ func (*lookupVisitor) requiresSplit() bool { return false }
//
// +checkescape:hard,stack
//go:nosplit
-func (p *PageTables) Lookup(addr usermem.Addr, findFirst bool) (virtual usermem.Addr, physical, size uintptr, opts MapOpts) {
- mask := uintptr(usermem.PageSize - 1)
- addr &^= usermem.Addr(mask)
+func (p *PageTables) Lookup(addr hostarch.Addr, findFirst bool) (virtual hostarch.Addr, physical, size uintptr, opts MapOpts) {
+ mask := uintptr(hostarch.PageSize - 1)
+ addr &^= hostarch.Addr(mask)
w := lookupWalker{
pageTables: p,
visitor: lookupVisitor{
@@ -308,12 +308,12 @@ func (p *PageTables) Lookup(addr usermem.Addr, findFirst bool) (virtual usermem.
findFirst: findFirst,
},
}
- end := ^usermem.Addr(0) &^ usermem.Addr(mask)
+ end := ^hostarch.Addr(0) &^ hostarch.Addr(mask)
if !findFirst {
end = addr + 1
}
w.iterateRange(uintptr(addr), uintptr(end))
- return usermem.Addr(w.visitor.target), w.visitor.physical, w.visitor.size, w.visitor.opts
+ return hostarch.Addr(w.visitor.target), w.visitor.physical, w.visitor.size, w.visitor.opts
}
// MarkReadOnlyShared marks the pagetables read-only and can be shared.
diff --git a/pkg/ring0/pagetables/pagetables_aarch64.go b/pkg/ring0/pagetables/pagetables_aarch64.go
index 163a3aea3..86eb00a4f 100644
--- a/pkg/ring0/pagetables/pagetables_aarch64.go
+++ b/pkg/ring0/pagetables/pagetables_aarch64.go
@@ -19,7 +19,7 @@ package pagetables
import (
"sync/atomic"
- "gvisor.dev/gvisor/pkg/usermem"
+ "gvisor.dev/gvisor/pkg/hostarch"
)
// archPageTables is architecture-specific data.
@@ -85,7 +85,7 @@ const (
// MapOpts are x86 options.
type MapOpts struct {
// AccessType defines permissions.
- AccessType usermem.AccessType
+ AccessType hostarch.AccessType
// Global indicates the page is globally accessible.
Global bool
@@ -120,7 +120,7 @@ func (p *PTE) Opts() MapOpts {
v := atomic.LoadUintptr((*uintptr)(p))
return MapOpts{
- AccessType: usermem.AccessType{
+ AccessType: hostarch.AccessType{
Read: true,
Write: v&readOnly == 0,
Execute: v&xn == 0,
diff --git a/pkg/ring0/pagetables/pagetables_amd64_test.go b/pkg/ring0/pagetables/pagetables_amd64_test.go
index 54e8e554f..a13c616ae 100644
--- a/pkg/ring0/pagetables/pagetables_amd64_test.go
+++ b/pkg/ring0/pagetables/pagetables_amd64_test.go
@@ -19,19 +19,19 @@ package pagetables
import (
"testing"
- "gvisor.dev/gvisor/pkg/usermem"
+ "gvisor.dev/gvisor/pkg/hostarch"
)
func Test2MAnd4K(t *testing.T) {
pt := New(NewRuntimeAllocator())
// Map a small page and a huge page.
- pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42)
- pt.Map(0x00007f0000000000, pmdSize, MapOpts{AccessType: usermem.Read}, pmdSize*47)
+ pt.Map(0x400000, pteSize, MapOpts{AccessType: hostarch.ReadWrite}, pteSize*42)
+ pt.Map(0x00007f0000000000, pmdSize, MapOpts{AccessType: hostarch.Read}, pmdSize*47)
checkMappings(t, pt, []mapping{
- {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}},
- {0x00007f0000000000, pmdSize, pmdSize * 47, MapOpts{AccessType: usermem.Read}},
+ {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: hostarch.ReadWrite}},
+ {0x00007f0000000000, pmdSize, pmdSize * 47, MapOpts{AccessType: hostarch.Read}},
})
}
@@ -39,12 +39,12 @@ func Test1GAnd4K(t *testing.T) {
pt := New(NewRuntimeAllocator())
// Map a small page and a super page.
- pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42)
- pt.Map(0x00007f0000000000, pudSize, MapOpts{AccessType: usermem.Read}, pudSize*47)
+ pt.Map(0x400000, pteSize, MapOpts{AccessType: hostarch.ReadWrite}, pteSize*42)
+ pt.Map(0x00007f0000000000, pudSize, MapOpts{AccessType: hostarch.Read}, pudSize*47)
checkMappings(t, pt, []mapping{
- {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}},
- {0x00007f0000000000, pudSize, pudSize * 47, MapOpts{AccessType: usermem.Read}},
+ {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: hostarch.ReadWrite}},
+ {0x00007f0000000000, pudSize, pudSize * 47, MapOpts{AccessType: hostarch.Read}},
})
}
@@ -52,12 +52,12 @@ func TestSplit1GPage(t *testing.T) {
pt := New(NewRuntimeAllocator())
// Map a super page and knock out the middle.
- pt.Map(0x00007f0000000000, pudSize, MapOpts{AccessType: usermem.Read}, pudSize*42)
- pt.Unmap(usermem.Addr(0x00007f0000000000+pteSize), pudSize-(2*pteSize))
+ pt.Map(0x00007f0000000000, pudSize, MapOpts{AccessType: hostarch.Read}, pudSize*42)
+ pt.Unmap(hostarch.Addr(0x00007f0000000000+pteSize), pudSize-(2*pteSize))
checkMappings(t, pt, []mapping{
- {0x00007f0000000000, pteSize, pudSize * 42, MapOpts{AccessType: usermem.Read}},
- {0x00007f0000000000 + pudSize - pteSize, pteSize, pudSize*42 + pudSize - pteSize, MapOpts{AccessType: usermem.Read}},
+ {0x00007f0000000000, pteSize, pudSize * 42, MapOpts{AccessType: hostarch.Read}},
+ {0x00007f0000000000 + pudSize - pteSize, pteSize, pudSize*42 + pudSize - pteSize, MapOpts{AccessType: hostarch.Read}},
})
}
@@ -65,11 +65,11 @@ func TestSplit2MPage(t *testing.T) {
pt := New(NewRuntimeAllocator())
// Map a huge page and knock out the middle.
- pt.Map(0x00007f0000000000, pmdSize, MapOpts{AccessType: usermem.Read}, pmdSize*42)
- pt.Unmap(usermem.Addr(0x00007f0000000000+pteSize), pmdSize-(2*pteSize))
+ pt.Map(0x00007f0000000000, pmdSize, MapOpts{AccessType: hostarch.Read}, pmdSize*42)
+ pt.Unmap(hostarch.Addr(0x00007f0000000000+pteSize), pmdSize-(2*pteSize))
checkMappings(t, pt, []mapping{
- {0x00007f0000000000, pteSize, pmdSize * 42, MapOpts{AccessType: usermem.Read}},
- {0x00007f0000000000 + pmdSize - pteSize, pteSize, pmdSize*42 + pmdSize - pteSize, MapOpts{AccessType: usermem.Read}},
+ {0x00007f0000000000, pteSize, pmdSize * 42, MapOpts{AccessType: hostarch.Read}},
+ {0x00007f0000000000 + pmdSize - pteSize, pteSize, pmdSize*42 + pmdSize - pteSize, MapOpts{AccessType: hostarch.Read}},
})
}
diff --git a/pkg/ring0/pagetables/pagetables_arm64_test.go b/pkg/ring0/pagetables/pagetables_arm64_test.go
index 2f73d424f..69320c2fb 100644
--- a/pkg/ring0/pagetables/pagetables_arm64_test.go
+++ b/pkg/ring0/pagetables/pagetables_arm64_test.go
@@ -58,7 +58,7 @@ func TestSplit1GPage(t *testing.T) {
// Map a super page and knock out the middle.
pt.Map(0x0000ff0000000000, pudSize, MapOpts{AccessType: usermem.Read, User: true}, pudSize*42)
- pt.Unmap(usermem.Addr(0x0000ff0000000000+pteSize), pudSize-(2*pteSize))
+ pt.Unmap(hostarch.Addr(0x0000ff0000000000+pteSize), pudSize-(2*pteSize))
checkMappings(t, pt, []mapping{
{0x0000ff0000000000, pteSize, pudSize * 42, MapOpts{AccessType: usermem.Read, User: true}},
@@ -71,7 +71,7 @@ func TestSplit2MPage(t *testing.T) {
// Map a huge page and knock out the middle.
pt.Map(0x0000ff0000000000, pmdSize, MapOpts{AccessType: usermem.Read, User: true}, pmdSize*42)
- pt.Unmap(usermem.Addr(0x0000ff0000000000+pteSize), pmdSize-(2*pteSize))
+ pt.Unmap(hostarch.Addr(0x0000ff0000000000+pteSize), pmdSize-(2*pteSize))
checkMappings(t, pt, []mapping{
{0x0000ff0000000000, pteSize, pmdSize * 42, MapOpts{AccessType: usermem.Read, User: true}},
diff --git a/pkg/ring0/pagetables/pagetables_test.go b/pkg/ring0/pagetables/pagetables_test.go
index 772f4fc5e..df93dcb6a 100644
--- a/pkg/ring0/pagetables/pagetables_test.go
+++ b/pkg/ring0/pagetables/pagetables_test.go
@@ -15,9 +15,8 @@
package pagetables
import (
+ "gvisor.dev/gvisor/pkg/hostarch"
"testing"
-
- "gvisor.dev/gvisor/pkg/usermem"
)
type mapping struct {
@@ -90,7 +89,7 @@ func TestUnmap(t *testing.T) {
pt := New(NewRuntimeAllocator())
// Map and unmap one entry.
- pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42)
+ pt.Map(0x400000, pteSize, MapOpts{AccessType: hostarch.ReadWrite}, pteSize*42)
pt.Unmap(0x400000, pteSize)
checkMappings(t, pt, nil)
@@ -100,10 +99,10 @@ func TestReadOnly(t *testing.T) {
pt := New(NewRuntimeAllocator())
// Map one entry.
- pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.Read}, pteSize*42)
+ pt.Map(0x400000, pteSize, MapOpts{AccessType: hostarch.Read}, pteSize*42)
checkMappings(t, pt, []mapping{
- {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.Read}},
+ {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: hostarch.Read}},
})
}
@@ -111,10 +110,10 @@ func TestReadWrite(t *testing.T) {
pt := New(NewRuntimeAllocator())
// Map one entry.
- pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42)
+ pt.Map(0x400000, pteSize, MapOpts{AccessType: hostarch.ReadWrite}, pteSize*42)
checkMappings(t, pt, []mapping{
- {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}},
+ {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: hostarch.ReadWrite}},
})
}
@@ -122,12 +121,12 @@ func TestSerialEntries(t *testing.T) {
pt := New(NewRuntimeAllocator())
// Map two sequential entries.
- pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42)
- pt.Map(0x401000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*47)
+ pt.Map(0x400000, pteSize, MapOpts{AccessType: hostarch.ReadWrite}, pteSize*42)
+ pt.Map(0x401000, pteSize, MapOpts{AccessType: hostarch.ReadWrite}, pteSize*47)
checkMappings(t, pt, []mapping{
- {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}},
- {0x401000, pteSize, pteSize * 47, MapOpts{AccessType: usermem.ReadWrite}},
+ {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: hostarch.ReadWrite}},
+ {0x401000, pteSize, pteSize * 47, MapOpts{AccessType: hostarch.ReadWrite}},
})
}
@@ -135,11 +134,11 @@ func TestSpanningEntries(t *testing.T) {
pt := New(NewRuntimeAllocator())
// Span a pgd with two pages.
- pt.Map(0x00007efffffff000, 2*pteSize, MapOpts{AccessType: usermem.Read}, pteSize*42)
+ pt.Map(0x00007efffffff000, 2*pteSize, MapOpts{AccessType: hostarch.Read}, pteSize*42)
checkMappings(t, pt, []mapping{
- {0x00007efffffff000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.Read}},
- {0x00007f0000000000, pteSize, pteSize * 43, MapOpts{AccessType: usermem.Read}},
+ {0x00007efffffff000, pteSize, pteSize * 42, MapOpts{AccessType: hostarch.Read}},
+ {0x00007f0000000000, pteSize, pteSize * 43, MapOpts{AccessType: hostarch.Read}},
})
}
@@ -147,11 +146,11 @@ func TestSparseEntries(t *testing.T) {
pt := New(NewRuntimeAllocator())
// Map two entries in different pgds.
- pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42)
- pt.Map(0x00007f0000000000, pteSize, MapOpts{AccessType: usermem.Read}, pteSize*47)
+ pt.Map(0x400000, pteSize, MapOpts{AccessType: hostarch.ReadWrite}, pteSize*42)
+ pt.Map(0x00007f0000000000, pteSize, MapOpts{AccessType: hostarch.Read}, pteSize*47)
checkMappings(t, pt, []mapping{
- {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}},
- {0x00007f0000000000, pteSize, pteSize * 47, MapOpts{AccessType: usermem.Read}},
+ {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: hostarch.ReadWrite}},
+ {0x00007f0000000000, pteSize, pteSize * 47, MapOpts{AccessType: hostarch.Read}},
})
}
diff --git a/pkg/ring0/pagetables/pagetables_x86.go b/pkg/ring0/pagetables/pagetables_x86.go
index 32edd2f0a..e43698173 100644
--- a/pkg/ring0/pagetables/pagetables_x86.go
+++ b/pkg/ring0/pagetables/pagetables_x86.go
@@ -19,7 +19,7 @@ package pagetables
import (
"sync/atomic"
- "gvisor.dev/gvisor/pkg/usermem"
+ "gvisor.dev/gvisor/pkg/hostarch"
)
// archPageTables is architecture-specific data.
@@ -63,7 +63,7 @@ const (
// MapOpts are x86 options.
type MapOpts struct {
// AccessType defines permissions.
- AccessType usermem.AccessType
+ AccessType hostarch.AccessType
// Global indicates the page is globally accessible.
Global bool
@@ -97,7 +97,7 @@ func (p *PTE) Valid() bool {
func (p *PTE) Opts() MapOpts {
v := atomic.LoadUintptr((*uintptr)(p))
return MapOpts{
- AccessType: usermem.AccessType{
+ AccessType: hostarch.AccessType{
Read: v&present != 0,
Write: v&writable != 0,
Execute: v&executeDisable == 0,
diff --git a/pkg/safecopy/safecopy_test.go b/pkg/safecopy/safecopy_test.go
index d2ce8ff86..611f36253 100644
--- a/pkg/safecopy/safecopy_test.go
+++ b/pkg/safecopy/safecopy_test.go
@@ -27,7 +27,7 @@ import (
"golang.org/x/sys/unix"
)
-// Size of a page in bytes. Cloned from usermem.PageSize to avoid a circular
+// Size of a page in bytes. Cloned from hostarch.PageSize to avoid a circular
// dependency.
const pageSize = 4096
diff --git a/pkg/seccomp/BUILD b/pkg/seccomp/BUILD
index 201dd072f..169fc0ac3 100644
--- a/pkg/seccomp/BUILD
+++ b/pkg/seccomp/BUILD
@@ -54,6 +54,6 @@ go_test(
deps = [
"//pkg/abi/linux",
"//pkg/bpf",
- "//pkg/usermem",
+ "//pkg/hostarch",
],
)
diff --git a/pkg/seccomp/seccomp_test.go b/pkg/seccomp/seccomp_test.go
index db06d1f1b..68feddf31 100644
--- a/pkg/seccomp/seccomp_test.go
+++ b/pkg/seccomp/seccomp_test.go
@@ -29,7 +29,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/bpf"
- "gvisor.dev/gvisor/pkg/usermem"
+ "gvisor.dev/gvisor/pkg/hostarch"
)
// newVictim makes a victim binary.
@@ -57,7 +57,7 @@ func dataAsInput(d *linux.SeccompData) bpf.Input {
d.MarshalUnsafe(buf)
return bpf.InputBytes{
Data: buf,
- Order: usermem.ByteOrder,
+ Order: hostarch.ByteOrder,
}
}
diff --git a/pkg/sentry/arch/BUILD b/pkg/sentry/arch/BUILD
index f660f1614..c9c52530d 100644
--- a/pkg/sentry/arch/BUILD
+++ b/pkg/sentry/arch/BUILD
@@ -32,6 +32,7 @@ go_library(
"//pkg/abi/linux",
"//pkg/context",
"//pkg/cpuid",
+ "//pkg/hostarch",
"//pkg/log",
"//pkg/marshal",
"//pkg/marshal/primitive",
diff --git a/pkg/sentry/arch/arch.go b/pkg/sentry/arch/arch.go
index 921151137..290863ee6 100644
--- a/pkg/sentry/arch/arch.go
+++ b/pkg/sentry/arch/arch.go
@@ -22,11 +22,11 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/cpuid"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/marshal"
"gvisor.dev/gvisor/pkg/sentry/arch/fpu"
"gvisor.dev/gvisor/pkg/sentry/limits"
- "gvisor.dev/gvisor/pkg/usermem"
)
// Arch describes an architecture.
@@ -188,11 +188,11 @@ type Context interface {
// returned layout must be no lower than min, and MaxAddr for the returned
// layout must be no higher than max. Repeated calls to NewMmapLayout may
// return different layouts.
- NewMmapLayout(min, max usermem.Addr, limits *limits.LimitSet) (MmapLayout, error)
+ NewMmapLayout(min, max hostarch.Addr, limits *limits.LimitSet) (MmapLayout, error)
// PIELoadAddress returns a preferred load address for a
// position-independent executable within l.
- PIELoadAddress(l MmapLayout) usermem.Addr
+ PIELoadAddress(l MmapLayout) hostarch.Addr
// FeatureSet returns the FeatureSet in use in this context.
FeatureSet() *cpuid.FeatureSet
@@ -257,18 +257,18 @@ const (
// +stateify savable
type MmapLayout struct {
// MinAddr is the lowest mappable address.
- MinAddr usermem.Addr
+ MinAddr hostarch.Addr
// MaxAddr is the highest mappable address.
- MaxAddr usermem.Addr
+ MaxAddr hostarch.Addr
// BottomUpBase is the lowest address that may be returned for a
// MmapBottomUp mmap.
- BottomUpBase usermem.Addr
+ BottomUpBase hostarch.Addr
// TopDownBase is the highest address that may be returned for a
// MmapTopDown mmap.
- TopDownBase usermem.Addr
+ TopDownBase hostarch.Addr
// DefaultDirection is the direction for most non-fixed mmaps in this
// layout.
@@ -316,9 +316,9 @@ type SyscallArgument struct {
// SyscallArguments represents the set of arguments passed to a syscall.
type SyscallArguments [6]SyscallArgument
-// Pointer returns the usermem.Addr representation of a pointer argument.
-func (a SyscallArgument) Pointer() usermem.Addr {
- return usermem.Addr(a.Value)
+// Pointer returns the hostarch.Addr representation of a pointer argument.
+func (a SyscallArgument) Pointer() hostarch.Addr {
+ return hostarch.Addr(a.Value)
}
// Int returns the int32 representation of a 32-bit signed integer argument.
diff --git a/pkg/sentry/arch/arch_amd64.go b/pkg/sentry/arch/arch_amd64.go
index 2571be60f..d6b4d2357 100644
--- a/pkg/sentry/arch/arch_amd64.go
+++ b/pkg/sentry/arch/arch_amd64.go
@@ -23,11 +23,11 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/cpuid"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/marshal"
"gvisor.dev/gvisor/pkg/marshal/primitive"
"gvisor.dev/gvisor/pkg/sentry/arch/fpu"
"gvisor.dev/gvisor/pkg/sentry/limits"
- "gvisor.dev/gvisor/pkg/usermem"
)
// Host specifies the host architecture.
@@ -37,7 +37,7 @@ const Host = AMD64
const (
// maxAddr64 is the maximum userspace address. It is TASK_SIZE in Linux
// for a 64-bit process.
- maxAddr64 usermem.Addr = (1 << 47) - usermem.PageSize
+ maxAddr64 hostarch.Addr = (1 << 47) - hostarch.PageSize
// maxStackRand64 is the maximum randomization to apply to the stack.
// It is defined by arch/x86/mm/mmap.c:stack_maxrandom_size in Linux.
@@ -45,7 +45,7 @@ const (
// maxMmapRand64 is the maximum randomization to apply to the mmap
// layout. It is defined by arch/x86/mm/mmap.c:arch_mmap_rnd in Linux.
- maxMmapRand64 = (1 << 28) * usermem.PageSize
+ maxMmapRand64 = (1 << 28) * hostarch.PageSize
// minGap64 is the minimum gap to leave at the top of the address space
// for the stack. It is defined by arch/x86/mm/mmap.c:MIN_GAP in Linux.
@@ -56,7 +56,7 @@ const (
//
// The Platform {Min,Max}UserAddress() may preclude loading at this
// address. See other preferredFoo comments below.
- preferredPIELoadAddr usermem.Addr = maxAddr64 / 3 * 2
+ preferredPIELoadAddr hostarch.Addr = maxAddr64 / 3 * 2
)
// These constants are selected as heuristics to help make the Platform's
@@ -92,13 +92,13 @@ const (
// This is all "preferred" because the layout min/max address may not
// allow us to select such a TopDownBase, in which case we have to fall
// back to a layout that TSAN may not be happy with.
- preferredTopDownAllocMin usermem.Addr = 0x7e8000000000
- preferredAllocationGap = 128 << 30 // 128 GB
- preferredTopDownBaseMin = preferredTopDownAllocMin + preferredAllocationGap
+ preferredTopDownAllocMin hostarch.Addr = 0x7e8000000000
+ preferredAllocationGap = 128 << 30 // 128 GB
+ preferredTopDownBaseMin = preferredTopDownAllocMin + preferredAllocationGap
// minMmapRand64 is the smallest we are willing to make the
// randomization to stay above preferredTopDownBaseMin.
- minMmapRand64 = (1 << 26) * usermem.PageSize
+ minMmapRand64 = (1 << 26) * hostarch.PageSize
)
// context64 represents an AMD64 context.
@@ -207,12 +207,12 @@ func (c *context64) FeatureSet() *cpuid.FeatureSet {
}
// mmapRand returns a random adjustment for randomizing an mmap layout.
-func mmapRand(max uint64) usermem.Addr {
- return usermem.Addr(rand.Int63n(int64(max))).RoundDown()
+func mmapRand(max uint64) hostarch.Addr {
+ return hostarch.Addr(rand.Int63n(int64(max))).RoundDown()
}
// NewMmapLayout implements Context.NewMmapLayout consistently with Linux.
-func (c *context64) NewMmapLayout(min, max usermem.Addr, r *limits.LimitSet) (MmapLayout, error) {
+func (c *context64) NewMmapLayout(min, max hostarch.Addr, r *limits.LimitSet) (MmapLayout, error) {
min, ok := min.RoundUp()
if !ok {
return MmapLayout{}, unix.EINVAL
@@ -230,7 +230,7 @@ func (c *context64) NewMmapLayout(min, max usermem.Addr, r *limits.LimitSet) (Mm
// MAX_GAP in Linux.
maxGap := (max / 6) * 5
- gap := usermem.Addr(stackSize.Cur)
+ gap := hostarch.Addr(stackSize.Cur)
if gap < minGap64 {
gap = minGap64
}
@@ -243,7 +243,7 @@ func (c *context64) NewMmapLayout(min, max usermem.Addr, r *limits.LimitSet) (Mm
}
topDownMin := max - gap - maxMmapRand64
- maxRand := usermem.Addr(maxMmapRand64)
+ maxRand := hostarch.Addr(maxMmapRand64)
if topDownMin < preferredTopDownBaseMin {
// Try to keep TopDownBase above preferredTopDownBaseMin by
// shrinking maxRand.
@@ -278,7 +278,7 @@ func (c *context64) NewMmapLayout(min, max usermem.Addr, r *limits.LimitSet) (Mm
}
// PIELoadAddress implements Context.PIELoadAddress.
-func (c *context64) PIELoadAddress(l MmapLayout) usermem.Addr {
+func (c *context64) PIELoadAddress(l MmapLayout) hostarch.Addr {
base := preferredPIELoadAddr
max, ok := base.AddLength(maxMmapRand64)
if !ok {
@@ -311,7 +311,7 @@ func (c *context64) PtracePeekUser(addr uintptr) (marshal.Marshallable, error) {
regs := c.ptraceGetRegs()
buf := make([]byte, regs.SizeBytes())
regs.MarshalUnsafe(buf)
- return c.Native(uintptr(usermem.ByteOrder.Uint64(buf[addr:]))), nil
+ return c.Native(uintptr(hostarch.ByteOrder.Uint64(buf[addr:]))), nil
}
// Note: x86 debug registers are missing.
return c.Native(0), nil
@@ -326,7 +326,7 @@ func (c *context64) PtracePokeUser(addr, data uintptr) error {
regs := c.ptraceGetRegs()
buf := make([]byte, regs.SizeBytes())
regs.MarshalUnsafe(buf)
- usermem.ByteOrder.PutUint64(buf[addr:], uint64(data))
+ hostarch.ByteOrder.PutUint64(buf[addr:], uint64(data))
_, err := c.PtraceSetRegs(bytes.NewBuffer(buf))
return err
}
diff --git a/pkg/sentry/arch/arch_arm64.go b/pkg/sentry/arch/arch_arm64.go
index 14ad9483b..348f238fd 100644
--- a/pkg/sentry/arch/arch_arm64.go
+++ b/pkg/sentry/arch/arch_arm64.go
@@ -22,11 +22,11 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/cpuid"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/marshal"
"gvisor.dev/gvisor/pkg/marshal/primitive"
"gvisor.dev/gvisor/pkg/sentry/arch/fpu"
"gvisor.dev/gvisor/pkg/sentry/limits"
- "gvisor.dev/gvisor/pkg/usermem"
)
// Host specifies the host architecture.
@@ -36,7 +36,7 @@ const Host = ARM64
const (
// maxAddr64 is the maximum userspace address. It is TASK_SIZE in Linux
// for a 64-bit process.
- maxAddr64 usermem.Addr = (1 << 48)
+ maxAddr64 hostarch.Addr = (1 << 48)
// maxStackRand64 is the maximum randomization to apply to the stack.
// It is defined by arch/arm64/mm/mmap.c:(STACK_RND_MASK << PAGE_SHIFT) in Linux.
@@ -44,7 +44,7 @@ const (
// maxMmapRand64 is the maximum randomization to apply to the mmap
// layout. It is defined by arch/arm64/mm/mmap.c:arch_mmap_rnd in Linux.
- maxMmapRand64 = (1 << 33) * usermem.PageSize
+ maxMmapRand64 = (1 << 33) * hostarch.PageSize
// minGap64 is the minimum gap to leave at the top of the address space
// for the stack. It is defined by arch/arm64/mm/mmap.c:MIN_GAP in Linux.
@@ -55,7 +55,7 @@ const (
//
// The Platform {Min,Max}UserAddress() may preclude loading at this
// address. See other preferredFoo comments below.
- preferredPIELoadAddr usermem.Addr = maxAddr64 / 6 * 5
+ preferredPIELoadAddr hostarch.Addr = maxAddr64 / 6 * 5
)
var (
@@ -66,13 +66,13 @@ var (
// These constants are selected as heuristics to help make the Platform's
// potentially limited address space conform as closely to Linux as possible.
const (
- preferredTopDownAllocMin usermem.Addr = 0x7e8000000000
- preferredAllocationGap = 128 << 30 // 128 GB
- preferredTopDownBaseMin = preferredTopDownAllocMin + preferredAllocationGap
+ preferredTopDownAllocMin hostarch.Addr = 0x7e8000000000
+ preferredAllocationGap = 128 << 30 // 128 GB
+ preferredTopDownBaseMin = preferredTopDownAllocMin + preferredAllocationGap
// minMmapRand64 is the smallest we are willing to make the
// randomization to stay above preferredTopDownBaseMin.
- minMmapRand64 = (1 << 18) * usermem.PageSize
+ minMmapRand64 = (1 << 18) * hostarch.PageSize
)
// context64 represents an ARM64 context.
@@ -187,12 +187,12 @@ func (c *context64) FeatureSet() *cpuid.FeatureSet {
}
// mmapRand returns a random adjustment for randomizing an mmap layout.
-func mmapRand(max uint64) usermem.Addr {
- return usermem.Addr(rand.Int63n(int64(max))).RoundDown()
+func mmapRand(max uint64) hostarch.Addr {
+ return hostarch.Addr(rand.Int63n(int64(max))).RoundDown()
}
// NewMmapLayout implements Context.NewMmapLayout consistently with Linux.
-func (c *context64) NewMmapLayout(min, max usermem.Addr, r *limits.LimitSet) (MmapLayout, error) {
+func (c *context64) NewMmapLayout(min, max hostarch.Addr, r *limits.LimitSet) (MmapLayout, error) {
min, ok := min.RoundUp()
if !ok {
return MmapLayout{}, unix.EINVAL
@@ -210,7 +210,7 @@ func (c *context64) NewMmapLayout(min, max usermem.Addr, r *limits.LimitSet) (Mm
// MAX_GAP in Linux.
maxGap := (max / 6) * 5
- gap := usermem.Addr(stackSize.Cur)
+ gap := hostarch.Addr(stackSize.Cur)
if gap < minGap64 {
gap = minGap64
}
@@ -223,7 +223,7 @@ func (c *context64) NewMmapLayout(min, max usermem.Addr, r *limits.LimitSet) (Mm
}
topDownMin := max - gap - maxMmapRand64
- maxRand := usermem.Addr(maxMmapRand64)
+ maxRand := hostarch.Addr(maxMmapRand64)
if topDownMin < preferredTopDownBaseMin {
// Try to keep TopDownBase above preferredTopDownBaseMin by
// shrinking maxRand.
@@ -258,7 +258,7 @@ func (c *context64) NewMmapLayout(min, max usermem.Addr, r *limits.LimitSet) (Mm
}
// PIELoadAddress implements Context.PIELoadAddress.
-func (c *context64) PIELoadAddress(l MmapLayout) usermem.Addr {
+func (c *context64) PIELoadAddress(l MmapLayout) hostarch.Addr {
base := preferredPIELoadAddr
max, ok := base.AddLength(maxMmapRand64)
if !ok {
diff --git a/pkg/sentry/arch/auxv.go b/pkg/sentry/arch/auxv.go
index 2b4c8f3fc..19ca18121 100644
--- a/pkg/sentry/arch/auxv.go
+++ b/pkg/sentry/arch/auxv.go
@@ -15,7 +15,7 @@
package arch
import (
- "gvisor.dev/gvisor/pkg/usermem"
+ "gvisor.dev/gvisor/pkg/hostarch"
)
// An AuxEntry represents an entry in an ELF auxiliary vector.
@@ -23,7 +23,7 @@ import (
// +stateify savable
type AuxEntry struct {
Key uint64
- Value usermem.Addr
+ Value hostarch.Addr
}
// An Auxv represents an ELF auxiliary vector.
diff --git a/pkg/sentry/arch/fpu/BUILD b/pkg/sentry/arch/fpu/BUILD
index 0a5395267..4e4f20639 100644
--- a/pkg/sentry/arch/fpu/BUILD
+++ b/pkg/sentry/arch/fpu/BUILD
@@ -13,9 +13,9 @@ go_library(
visibility = ["//:sandbox"],
deps = [
"//pkg/cpuid",
+ "//pkg/hostarch",
"//pkg/sync",
"//pkg/syserror",
- "//pkg/usermem",
"@org_golang_x_sys//unix:go_default_library",
],
)
diff --git a/pkg/sentry/arch/fpu/fpu_amd64.go b/pkg/sentry/arch/fpu/fpu_amd64.go
index 3a62f51be..1e9625bee 100644
--- a/pkg/sentry/arch/fpu/fpu_amd64.go
+++ b/pkg/sentry/arch/fpu/fpu_amd64.go
@@ -21,9 +21,9 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/cpuid"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
// initX86FPState (defined in asm files) sets up initial state.
@@ -146,11 +146,11 @@ const (
// any of the reserved bits of the MXCSR register." - Intel SDM Vol. 1, Section
// 10.5.1.2 "SSE State")
func sanitizeMXCSR(f State) {
- mxcsr := usermem.ByteOrder.Uint32(f[mxcsrOffset:])
+ mxcsr := hostarch.ByteOrder.Uint32(f[mxcsrOffset:])
initMXCSRMask.Do(func() {
temp := State(alignedBytes(uint(ptraceFPRegsSize), 16))
initX86FPState(&temp[0], false /* useXsave */)
- mxcsrMask = usermem.ByteOrder.Uint32(temp[mxcsrMaskOffset:])
+ mxcsrMask = hostarch.ByteOrder.Uint32(temp[mxcsrMaskOffset:])
if mxcsrMask == 0 {
// "If the value of the MXCSR_MASK field is 00000000H, then the
// MXCSR_MASK value is the default value of 0000FFBFH." - Intel SDM
@@ -160,7 +160,7 @@ func sanitizeMXCSR(f State) {
}
})
mxcsr &= mxcsrMask
- usermem.ByteOrder.PutUint32(f[mxcsrOffset:], mxcsr)
+ hostarch.ByteOrder.PutUint32(f[mxcsrOffset:], mxcsr)
}
// PtraceGetXstateRegs implements ptrace(PTRACE_GETREGS, NT_X86_XSTATE) by
@@ -177,7 +177,7 @@ func (s *State) PtraceGetXstateRegs(dst io.Writer, maxlen int, featureSet *cpuid
// Area". Linux uses the first 8 bytes of this area to store the OS XSTATE
// mask. GDB relies on this: see
// gdb/x86-linux-nat.c:x86_linux_read_description().
- usermem.ByteOrder.PutUint64(f[userXstateXCR0Offset:], featureSet.ValidXCR0Mask())
+ hostarch.ByteOrder.PutUint64(f[userXstateXCR0Offset:], featureSet.ValidXCR0Mask())
if len(f) > maxlen {
f = f[:maxlen]
}
@@ -208,9 +208,9 @@ func (s *State) PtraceSetXstateRegs(src io.Reader, maxlen int, featureSet *cpuid
// Force reserved bits in MXCSR to 0. This is consistent with Linux.
sanitizeMXCSR(State(f))
// Users can't enable *more* XCR0 bits than what we, and the CPU, support.
- xstateBV := usermem.ByteOrder.Uint64(f[xstateBVOffset:])
+ xstateBV := hostarch.ByteOrder.Uint64(f[xstateBVOffset:])
xstateBV &= featureSet.ValidXCR0Mask()
- usermem.ByteOrder.PutUint64(f[xstateBVOffset:], xstateBV)
+ hostarch.ByteOrder.PutUint64(f[xstateBVOffset:], xstateBV)
// Force XCOMP_BV and reserved bytes in the XSAVE header to 0.
reserved := f[xsaveHeaderZeroedOffset : xsaveHeaderZeroedOffset+xsaveHeaderZeroedBytes]
for i := range reserved {
@@ -266,7 +266,7 @@ func (s *State) AfterLoad() {
// What was in use?
savedBV := fxsaveBV
if len(old) >= xstateBVOffset+8 {
- savedBV = usermem.ByteOrder.Uint64(old[xstateBVOffset:])
+ savedBV = hostarch.ByteOrder.Uint64(old[xstateBVOffset:])
}
// Supported features must be a superset of saved features.
diff --git a/pkg/sentry/arch/fpu/fpu_arm64.go b/pkg/sentry/arch/fpu/fpu_arm64.go
index d2f62631d..46634661f 100644
--- a/pkg/sentry/arch/fpu/fpu_arm64.go
+++ b/pkg/sentry/arch/fpu/fpu_arm64.go
@@ -58,6 +58,8 @@ func (s *State) Fork() State {
}
// BytePointer returns a pointer to the first byte of the state.
+//
+//go:nosplit
func (s *State) BytePointer() *byte {
return &(*s)[0]
}
diff --git a/pkg/sentry/arch/signal.go b/pkg/sentry/arch/signal.go
index 35d2e07c3..67d7edf68 100644
--- a/pkg/sentry/arch/signal.go
+++ b/pkg/sentry/arch/signal.go
@@ -16,7 +16,7 @@ package arch
import (
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/usermem"
+ "gvisor.dev/gvisor/pkg/hostarch"
)
// SignalAct represents the action that should be taken when a signal is
@@ -154,107 +154,107 @@ func (s *SignalInfo) FixSignalCodeForUser() {
// PID returns the si_pid field.
func (s *SignalInfo) PID() int32 {
- return int32(usermem.ByteOrder.Uint32(s.Fields[0:4]))
+ return int32(hostarch.ByteOrder.Uint32(s.Fields[0:4]))
}
// SetPID mutates the si_pid field.
func (s *SignalInfo) SetPID(val int32) {
- usermem.ByteOrder.PutUint32(s.Fields[0:4], uint32(val))
+ hostarch.ByteOrder.PutUint32(s.Fields[0:4], uint32(val))
}
// UID returns the si_uid field.
func (s *SignalInfo) UID() int32 {
- return int32(usermem.ByteOrder.Uint32(s.Fields[4:8]))
+ return int32(hostarch.ByteOrder.Uint32(s.Fields[4:8]))
}
// SetUID mutates the si_uid field.
func (s *SignalInfo) SetUID(val int32) {
- usermem.ByteOrder.PutUint32(s.Fields[4:8], uint32(val))
+ hostarch.ByteOrder.PutUint32(s.Fields[4:8], uint32(val))
}
// Sigval returns the sigval field, which is aliased to both si_int and si_ptr.
func (s *SignalInfo) Sigval() uint64 {
- return usermem.ByteOrder.Uint64(s.Fields[8:16])
+ return hostarch.ByteOrder.Uint64(s.Fields[8:16])
}
// SetSigval mutates the sigval field.
func (s *SignalInfo) SetSigval(val uint64) {
- usermem.ByteOrder.PutUint64(s.Fields[8:16], val)
+ hostarch.ByteOrder.PutUint64(s.Fields[8:16], val)
}
// TimerID returns the si_timerid field.
func (s *SignalInfo) TimerID() linux.TimerID {
- return linux.TimerID(usermem.ByteOrder.Uint32(s.Fields[0:4]))
+ return linux.TimerID(hostarch.ByteOrder.Uint32(s.Fields[0:4]))
}
// SetTimerID sets the si_timerid field.
func (s *SignalInfo) SetTimerID(val linux.TimerID) {
- usermem.ByteOrder.PutUint32(s.Fields[0:4], uint32(val))
+ hostarch.ByteOrder.PutUint32(s.Fields[0:4], uint32(val))
}
// Overrun returns the si_overrun field.
func (s *SignalInfo) Overrun() int32 {
- return int32(usermem.ByteOrder.Uint32(s.Fields[4:8]))
+ return int32(hostarch.ByteOrder.Uint32(s.Fields[4:8]))
}
// SetOverrun sets the si_overrun field.
func (s *SignalInfo) SetOverrun(val int32) {
- usermem.ByteOrder.PutUint32(s.Fields[4:8], uint32(val))
+ hostarch.ByteOrder.PutUint32(s.Fields[4:8], uint32(val))
}
// Addr returns the si_addr field.
func (s *SignalInfo) Addr() uint64 {
- return usermem.ByteOrder.Uint64(s.Fields[0:8])
+ return hostarch.ByteOrder.Uint64(s.Fields[0:8])
}
// SetAddr sets the si_addr field.
func (s *SignalInfo) SetAddr(val uint64) {
- usermem.ByteOrder.PutUint64(s.Fields[0:8], val)
+ hostarch.ByteOrder.PutUint64(s.Fields[0:8], val)
}
// Status returns the si_status field.
func (s *SignalInfo) Status() int32 {
- return int32(usermem.ByteOrder.Uint32(s.Fields[8:12]))
+ return int32(hostarch.ByteOrder.Uint32(s.Fields[8:12]))
}
// SetStatus mutates the si_status field.
func (s *SignalInfo) SetStatus(val int32) {
- usermem.ByteOrder.PutUint32(s.Fields[8:12], uint32(val))
+ hostarch.ByteOrder.PutUint32(s.Fields[8:12], uint32(val))
}
// CallAddr returns the si_call_addr field.
func (s *SignalInfo) CallAddr() uint64 {
- return usermem.ByteOrder.Uint64(s.Fields[0:8])
+ return hostarch.ByteOrder.Uint64(s.Fields[0:8])
}
// SetCallAddr mutates the si_call_addr field.
func (s *SignalInfo) SetCallAddr(val uint64) {
- usermem.ByteOrder.PutUint64(s.Fields[0:8], val)
+ hostarch.ByteOrder.PutUint64(s.Fields[0:8], val)
}
// Syscall returns the si_syscall field.
func (s *SignalInfo) Syscall() int32 {
- return int32(usermem.ByteOrder.Uint32(s.Fields[8:12]))
+ return int32(hostarch.ByteOrder.Uint32(s.Fields[8:12]))
}
// SetSyscall mutates the si_syscall field.
func (s *SignalInfo) SetSyscall(val int32) {
- usermem.ByteOrder.PutUint32(s.Fields[8:12], uint32(val))
+ hostarch.ByteOrder.PutUint32(s.Fields[8:12], uint32(val))
}
// Arch returns the si_arch field.
func (s *SignalInfo) Arch() uint32 {
- return usermem.ByteOrder.Uint32(s.Fields[12:16])
+ return hostarch.ByteOrder.Uint32(s.Fields[12:16])
}
// SetArch mutates the si_arch field.
func (s *SignalInfo) SetArch(val uint32) {
- usermem.ByteOrder.PutUint32(s.Fields[12:16], val)
+ hostarch.ByteOrder.PutUint32(s.Fields[12:16], val)
}
// Band returns the si_band field.
func (s *SignalInfo) Band() int64 {
- return int64(usermem.ByteOrder.Uint64(s.Fields[0:8]))
+ return int64(hostarch.ByteOrder.Uint64(s.Fields[0:8]))
}
// SetBand mutates the si_band field.
@@ -262,15 +262,15 @@ func (s *SignalInfo) SetBand(val int64) {
// Note: this assumes the platform uses `long` as `__ARCH_SI_BAND_T`.
// On some platforms, which gVisor doesn't support, `__ARCH_SI_BAND_T` is
// `int`. See siginfo.h.
- usermem.ByteOrder.PutUint64(s.Fields[0:8], uint64(val))
+ hostarch.ByteOrder.PutUint64(s.Fields[0:8], uint64(val))
}
// FD returns the si_fd field.
func (s *SignalInfo) FD() uint32 {
- return usermem.ByteOrder.Uint32(s.Fields[8:12])
+ return hostarch.ByteOrder.Uint32(s.Fields[8:12])
}
// SetFD mutates the si_fd field.
func (s *SignalInfo) SetFD(val uint32) {
- usermem.ByteOrder.PutUint32(s.Fields[8:12], val)
+ hostarch.ByteOrder.PutUint32(s.Fields[8:12], val)
}
diff --git a/pkg/sentry/arch/signal_amd64.go b/pkg/sentry/arch/signal_amd64.go
index ee3743483..082ed92b1 100644
--- a/pkg/sentry/arch/signal_amd64.go
+++ b/pkg/sentry/arch/signal_amd64.go
@@ -21,10 +21,10 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/marshal/primitive"
"gvisor.dev/gvisor/pkg/sentry/arch/fpu"
- "gvisor.dev/gvisor/pkg/usermem"
)
// SignalContext64 is equivalent to struct sigcontext, the type passed as the
@@ -133,7 +133,7 @@ func (c *context64) SignalSetup(st *Stack, act *SignalAct, info *SignalInfo, alt
// space on the user stack naturally caps the amount of memory the
// sentry will allocate for this purpose.
fpSize, _ := c.fpuFrameSize()
- sp = (sp - usermem.Addr(fpSize)) & ^usermem.Addr(63)
+ sp = (sp - hostarch.Addr(fpSize)) & ^hostarch.Addr(63)
// Construct the UContext64 now since we need its size.
uc := &UContext64{
@@ -180,8 +180,8 @@ func (c *context64) SignalSetup(st *Stack, act *SignalAct, info *SignalInfo, alt
ucSize := uc.SizeBytes()
// st.Arch.Width() is for the restorer address. sizeof(siginfo) == 128.
frameSize := int(st.Arch.Width()) + ucSize + 128
- frameBottom := (sp-usermem.Addr(frameSize)) & ^usermem.Addr(15) - 8
- sp = frameBottom + usermem.Addr(frameSize)
+ frameBottom := (sp-hostarch.Addr(frameSize)) & ^hostarch.Addr(15) - 8
+ sp = frameBottom + hostarch.Addr(frameSize)
st.Bottom = sp
// Prior to proceeding, figure out if the frame will exhaust the range
diff --git a/pkg/sentry/arch/signal_arm64.go b/pkg/sentry/arch/signal_arm64.go
index 53281dcba..da71fb873 100644
--- a/pkg/sentry/arch/signal_arm64.go
+++ b/pkg/sentry/arch/signal_arm64.go
@@ -19,9 +19,9 @@ package arch
import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/arch/fpu"
- "gvisor.dev/gvisor/pkg/usermem"
)
// SignalContext64 is equivalent to struct sigcontext, the type passed as the
@@ -107,8 +107,8 @@ func (c *context64) SignalSetup(st *Stack, act *SignalAct, info *SignalInfo, alt
// sizeof(siginfo) == 128.
// R30 stores the restorer address.
frameSize := ucSize + 128
- frameBottom := (sp - usermem.Addr(frameSize)) & ^usermem.Addr(15)
- sp = frameBottom + usermem.Addr(frameSize)
+ frameBottom := (sp - hostarch.Addr(frameSize)) & ^hostarch.Addr(15)
+ sp = frameBottom + hostarch.Addr(frameSize)
st.Bottom = sp
// Prior to proceeding, figure out if the frame will exhaust the range
diff --git a/pkg/sentry/arch/signal_stack.go b/pkg/sentry/arch/signal_stack.go
index a1eae98f9..c732c7503 100644
--- a/pkg/sentry/arch/signal_stack.go
+++ b/pkg/sentry/arch/signal_stack.go
@@ -17,8 +17,8 @@
package arch
import (
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/marshal"
- "gvisor.dev/gvisor/pkg/usermem"
)
const (
@@ -36,8 +36,8 @@ func (s SignalStack) IsEnabled() bool {
}
// Top returns the stack's top address.
-func (s SignalStack) Top() usermem.Addr {
- return usermem.Addr(s.Addr + s.Size)
+func (s SignalStack) Top() hostarch.Addr {
+ return hostarch.Addr(s.Addr + s.Size)
}
// SetOnStack marks this signal stack as in use.
@@ -49,8 +49,8 @@ func (s *SignalStack) SetOnStack() {
}
// Contains checks if the stack pointer is within this stack.
-func (s *SignalStack) Contains(sp usermem.Addr) bool {
- return usermem.Addr(s.Addr) < sp && sp <= usermem.Addr(s.Addr+s.Size)
+func (s *SignalStack) Contains(sp hostarch.Addr) bool {
+ return hostarch.Addr(s.Addr) < sp && sp <= hostarch.Addr(s.Addr+s.Size)
}
// NativeSignalStack is a type that is equivalent to stack_t in the guest
diff --git a/pkg/sentry/arch/stack.go b/pkg/sentry/arch/stack.go
index 5f06c751d..65a794c7c 100644
--- a/pkg/sentry/arch/stack.go
+++ b/pkg/sentry/arch/stack.go
@@ -16,18 +16,20 @@ package arch
import (
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/marshal/primitive"
+
"gvisor.dev/gvisor/pkg/usermem"
)
-// Stack is a simple wrapper around a usermem.IO and an address. Stack
+// Stack is a simple wrapper around a hostarch.IO and an address. Stack
// implements marshal.CopyContext, and marshallable values can be pushed or
// popped from the stack through the marshal.Marshallable interface.
//
// Stack is not thread-safe.
type Stack struct {
// Our arch info.
- // We use this for automatic Native conversion of usermem.Addrs during
+ // We use this for automatic Native conversion of hostarch.Addrs during
// Push() and Pop().
Arch Context
@@ -35,7 +37,7 @@ type Stack struct {
IO usermem.IO
// Our current stack bottom.
- Bottom usermem.Addr
+ Bottom hostarch.Addr
// Scratch buffer used for marshalling to avoid having to repeatedly
// allocate scratch memory.
@@ -59,20 +61,20 @@ func (s *Stack) CopyScratchBuffer(size int) []byte {
// StackBottomMagic is the special address callers must past to all stack
// marshalling operations to cause the src/dst address to be computed based on
// the current end of the stack.
-const StackBottomMagic = ^usermem.Addr(0) // usermem.Addr(-1)
+const StackBottomMagic = ^hostarch.Addr(0) // hostarch.Addr(-1)
// CopyOutBytes implements marshal.CopyContext.CopyOutBytes. CopyOutBytes
// computes an appropriate address based on the current end of the
// stack. Callers use the sentinel address StackBottomMagic to marshal methods
// to indicate this.
-func (s *Stack) CopyOutBytes(sentinel usermem.Addr, b []byte) (int, error) {
+func (s *Stack) CopyOutBytes(sentinel hostarch.Addr, b []byte) (int, error) {
if sentinel != StackBottomMagic {
panic("Attempted to copy out to stack with absolute address")
}
c := len(b)
- n, err := s.IO.CopyOut(context.Background(), s.Bottom-usermem.Addr(c), b, usermem.IOOpts{})
+ n, err := s.IO.CopyOut(context.Background(), s.Bottom-hostarch.Addr(c), b, usermem.IOOpts{})
if err == nil && n == c {
- s.Bottom -= usermem.Addr(n)
+ s.Bottom -= hostarch.Addr(n)
}
return n, err
}
@@ -81,21 +83,21 @@ func (s *Stack) CopyOutBytes(sentinel usermem.Addr, b []byte) (int, error) {
// an appropriate address based on the current end of the stack. Callers must
// use the sentinel address StackBottomMagic to marshal methods to indicate
// this.
-func (s *Stack) CopyInBytes(sentinel usermem.Addr, b []byte) (int, error) {
+func (s *Stack) CopyInBytes(sentinel hostarch.Addr, b []byte) (int, error) {
if sentinel != StackBottomMagic {
panic("Attempted to copy in from stack with absolute address")
}
n, err := s.IO.CopyIn(context.Background(), s.Bottom, b, usermem.IOOpts{})
if err == nil {
- s.Bottom += usermem.Addr(n)
+ s.Bottom += hostarch.Addr(n)
}
return n, err
}
// Align aligns the stack to the given offset.
func (s *Stack) Align(offset int) {
- if s.Bottom%usermem.Addr(offset) != 0 {
- s.Bottom -= (s.Bottom % usermem.Addr(offset))
+ if s.Bottom%hostarch.Addr(offset) != 0 {
+ s.Bottom -= (s.Bottom % hostarch.Addr(offset))
}
}
@@ -119,16 +121,16 @@ func (s *Stack) PushNullTerminatedByteSlice(bs []byte) (int, error) {
// stack.
type StackLayout struct {
// ArgvStart is the beginning of the argument vector.
- ArgvStart usermem.Addr
+ ArgvStart hostarch.Addr
// ArgvEnd is the end of the argument vector.
- ArgvEnd usermem.Addr
+ ArgvEnd hostarch.Addr
// EnvvStart is the beginning of the environment vector.
- EnvvStart usermem.Addr
+ EnvvStart hostarch.Addr
// EnvvEnd is the end of the environment vector.
- EnvvEnd usermem.Addr
+ EnvvEnd hostarch.Addr
}
// Load pushes the given args, env and aux vector to the stack using the
@@ -148,7 +150,7 @@ func (s *Stack) Load(args []string, env []string, aux Auxv) (StackLayout, error)
// to be in this order. See: https://www.uclibc.org/docs/psABI-x86_64.pdf
// page 29.
l.EnvvEnd = s.Bottom
- envAddrs := make([]usermem.Addr, len(env))
+ envAddrs := make([]hostarch.Addr, len(env))
for i := len(env) - 1; i >= 0; i-- {
if _, err := s.PushNullTerminatedByteSlice([]byte(env[i])); err != nil {
return StackLayout{}, err
@@ -159,7 +161,7 @@ func (s *Stack) Load(args []string, env []string, aux Auxv) (StackLayout, error)
// Push our strings.
l.ArgvEnd = s.Bottom
- argAddrs := make([]usermem.Addr, len(args))
+ argAddrs := make([]hostarch.Addr, len(args))
for i := len(args) - 1; i >= 0; i-- {
if _, err := s.PushNullTerminatedByteSlice([]byte(args[i])); err != nil {
return StackLayout{}, err
@@ -178,7 +180,7 @@ func (s *Stack) Load(args []string, env []string, aux Auxv) (StackLayout, error)
argvSize := s.Arch.Width() * uint(len(args)+1)
envvSize := s.Arch.Width() * uint(len(env)+1)
auxvSize := s.Arch.Width() * 2 * uint(len(aux)+1)
- total := usermem.Addr(argvSize) + usermem.Addr(envvSize) + usermem.Addr(auxvSize) + usermem.Addr(s.Arch.Width())
+ total := hostarch.Addr(argvSize) + hostarch.Addr(envvSize) + hostarch.Addr(auxvSize) + hostarch.Addr(s.Arch.Width())
expectedBottom := s.Bottom - total
if expectedBottom%32 != 0 {
s.Bottom -= expectedBottom % 32
@@ -188,11 +190,11 @@ func (s *Stack) Load(args []string, env []string, aux Auxv) (StackLayout, error)
// NOTE: We need an extra zero here per spec.
// The Push function will automatically terminate
// strings and arrays with a single null value.
- auxv := make([]usermem.Addr, 0, len(aux))
+ auxv := make([]hostarch.Addr, 0, len(aux))
for _, a := range aux {
- auxv = append(auxv, usermem.Addr(a.Key), a.Value)
+ auxv = append(auxv, hostarch.Addr(a.Key), a.Value)
}
- auxv = append(auxv, usermem.Addr(0))
+ auxv = append(auxv, hostarch.Addr(0))
_, err := s.pushAddrSliceAndTerminator(auxv)
if err != nil {
return StackLayout{}, err
diff --git a/pkg/sentry/arch/stack_unsafe.go b/pkg/sentry/arch/stack_unsafe.go
index 0e478e434..f4712d58f 100644
--- a/pkg/sentry/arch/stack_unsafe.go
+++ b/pkg/sentry/arch/stack_unsafe.go
@@ -17,19 +17,19 @@ package arch
import (
"unsafe"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/marshal/primitive"
- "gvisor.dev/gvisor/pkg/usermem"
)
// pushAddrSliceAndTerminator copies a slices of addresses to the stack, and
// also pushes an extra null address element at the end of the slice.
//
// Internally, we unsafely transmute the slice type from the arch-dependent
-// []usermem.Addr type, to a slice of fixed-sized ints so that we can pass it to
+// []hostarch.Addr type, to a slice of fixed-sized ints so that we can pass it to
// go-marshal.
//
// On error, the contents of the stack and the bottom cursor are undefined.
-func (s *Stack) pushAddrSliceAndTerminator(src []usermem.Addr) (int, error) {
+func (s *Stack) pushAddrSliceAndTerminator(src []hostarch.Addr) (int, error) {
// Note: Stack grows upwards, so push the terminator first.
switch s.Arch.Width() {
case 8:
diff --git a/pkg/sentry/devices/memdev/zero.go b/pkg/sentry/devices/memdev/zero.go
index 1929e41cd..49c53452a 100644
--- a/pkg/sentry/devices/memdev/zero.go
+++ b/pkg/sentry/devices/memdev/zero.go
@@ -93,6 +93,7 @@ func (fd *zeroFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) erro
// "/dev/zero (deleted)".
opts.Offset = 0
opts.MappingIdentity = &fd.vfsfd
+ opts.SentryOwnedContent = true
opts.MappingIdentity.IncRef()
return nil
}
diff --git a/pkg/sentry/devices/tundev/BUILD b/pkg/sentry/devices/tundev/BUILD
index 71c59287c..8b38d574d 100644
--- a/pkg/sentry/devices/tundev/BUILD
+++ b/pkg/sentry/devices/tundev/BUILD
@@ -9,6 +9,7 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/hostarch",
"//pkg/sentry/arch",
"//pkg/sentry/fsimpl/devtmpfs",
"//pkg/sentry/inet",
diff --git a/pkg/sentry/devices/tundev/tundev.go b/pkg/sentry/devices/tundev/tundev.go
index c43158aa4..a12eeb8e7 100644
--- a/pkg/sentry/devices/tundev/tundev.go
+++ b/pkg/sentry/devices/tundev/tundev.go
@@ -18,6 +18,7 @@ package tundev
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs"
"gvisor.dev/gvisor/pkg/sentry/inet"
@@ -89,7 +90,7 @@ func (fd *tunFD) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArg
}
// Validate flags.
- flags, err := netstack.LinuxToTUNFlags(usermem.ByteOrder.Uint16(req.Data[:]))
+ flags, err := netstack.LinuxToTUNFlags(hostarch.ByteOrder.Uint16(req.Data[:]))
if err != nil {
return 0, err
}
@@ -98,7 +99,7 @@ func (fd *tunFD) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArg
case linux.TUNGETIFF:
var req linux.IFReq
copy(req.IFName[:], fd.device.Name())
- usermem.ByteOrder.PutUint16(req.Data[:], netstack.TUNFlagsToLinux(fd.device.Flags()))
+ hostarch.ByteOrder.PutUint16(req.Data[:], netstack.TUNFlagsToLinux(fd.device.Flags()))
_, err := req.CopyOut(t, data)
return 0, err
diff --git a/pkg/sentry/fs/BUILD b/pkg/sentry/fs/BUILD
index 420fbae34..0dc100f9b 100644
--- a/pkg/sentry/fs/BUILD
+++ b/pkg/sentry/fs/BUILD
@@ -48,6 +48,7 @@ go_library(
"//pkg/abi/linux",
"//pkg/amutex",
"//pkg/context",
+ "//pkg/hostarch",
"//pkg/log",
"//pkg/p9",
"//pkg/refs",
diff --git a/pkg/sentry/fs/anon/BUILD b/pkg/sentry/fs/anon/BUILD
index aedcecfa1..1ce56d79f 100644
--- a/pkg/sentry/fs/anon/BUILD
+++ b/pkg/sentry/fs/anon/BUILD
@@ -12,9 +12,9 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/hostarch",
"//pkg/sentry/device",
"//pkg/sentry/fs",
"//pkg/sentry/fs/fsutil",
- "//pkg/usermem",
],
)
diff --git a/pkg/sentry/fs/anon/anon.go b/pkg/sentry/fs/anon/anon.go
index 5c421f5fb..8bda22a8e 100644
--- a/pkg/sentry/fs/anon/anon.go
+++ b/pkg/sentry/fs/anon/anon.go
@@ -19,9 +19,9 @@ package anon
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.dev/gvisor/pkg/usermem"
)
// NewInode constructs an anonymous Inode that is not associated
@@ -37,6 +37,6 @@ func NewInode(ctx context.Context) *fs.Inode {
Type: fs.Anonymous,
DeviceID: PseudoDevice.DeviceID(),
InodeID: PseudoDevice.NextIno(),
- BlockSize: usermem.PageSize,
+ BlockSize: hostarch.PageSize,
})
}
diff --git a/pkg/sentry/fs/copy_up.go b/pkg/sentry/fs/copy_up.go
index 58deb25fc..5aa668873 100644
--- a/pkg/sentry/fs/copy_up.go
+++ b/pkg/sentry/fs/copy_up.go
@@ -20,6 +20,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sync"
@@ -339,7 +340,7 @@ func cleanupUpper(ctx context.Context, parent *Inode, name string, copyUpErr err
// size is the same used by io.Copy.
var copyUpBuffers = sync.Pool{
New: func() interface{} {
- b := make([]byte, 8*usermem.PageSize)
+ b := make([]byte, 8*hostarch.PageSize)
return &b
},
}
diff --git a/pkg/sentry/fs/dev/BUILD b/pkg/sentry/fs/dev/BUILD
index 9379a4d7b..23a3a9a2d 100644
--- a/pkg/sentry/fs/dev/BUILD
+++ b/pkg/sentry/fs/dev/BUILD
@@ -18,6 +18,7 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/hostarch",
"//pkg/rand",
"//pkg/safemem",
"//pkg/sentry/arch",
diff --git a/pkg/sentry/fs/dev/dev.go b/pkg/sentry/fs/dev/dev.go
index acbd401a0..e84ba7a5d 100644
--- a/pkg/sentry/fs/dev/dev.go
+++ b/pkg/sentry/fs/dev/dev.go
@@ -19,6 +19,7 @@ import (
"math"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/ramfs"
"gvisor.dev/gvisor/pkg/sentry/fs/tmpfs"
@@ -49,7 +50,7 @@ func newCharacterDevice(ctx context.Context, iops fs.InodeOperations, msrc *fs.M
return fs.NewInode(ctx, iops, msrc, fs.StableAttr{
DeviceID: devDevice.DeviceID(),
InodeID: devDevice.NextIno(),
- BlockSize: usermem.PageSize,
+ BlockSize: hostarch.PageSize,
Type: fs.CharacterDevice,
DeviceFileMajor: major,
DeviceFileMinor: minor,
@@ -60,7 +61,7 @@ func newMemDevice(ctx context.Context, iops fs.InodeOperations, msrc *fs.MountSo
return fs.NewInode(ctx, iops, msrc, fs.StableAttr{
DeviceID: devDevice.DeviceID(),
InodeID: devDevice.NextIno(),
- BlockSize: usermem.PageSize,
+ BlockSize: hostarch.PageSize,
Type: fs.CharacterDevice,
DeviceFileMajor: memDevMajor,
DeviceFileMinor: minor,
@@ -72,7 +73,7 @@ func newDirectory(ctx context.Context, contents map[string]*fs.Inode, msrc *fs.M
return fs.NewInode(ctx, iops, msrc, fs.StableAttr{
DeviceID: devDevice.DeviceID(),
InodeID: devDevice.NextIno(),
- BlockSize: usermem.PageSize,
+ BlockSize: hostarch.PageSize,
Type: fs.Directory,
})
}
@@ -82,7 +83,7 @@ func newSymlink(ctx context.Context, target string, msrc *fs.MountSource) *fs.In
return fs.NewInode(ctx, iops, msrc, fs.StableAttr{
DeviceID: devDevice.DeviceID(),
InodeID: devDevice.NextIno(),
- BlockSize: usermem.PageSize,
+ BlockSize: hostarch.PageSize,
Type: fs.Symlink,
})
}
@@ -137,7 +138,7 @@ func New(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
return fs.NewInode(ctx, iops, msrc, fs.StableAttr{
DeviceID: devDevice.DeviceID(),
InodeID: devDevice.NextIno(),
- BlockSize: usermem.PageSize,
+ BlockSize: hostarch.PageSize,
Type: fs.Directory,
})
}
diff --git a/pkg/sentry/fs/dev/net_tun.go b/pkg/sentry/fs/dev/net_tun.go
index 11a2984d8..77e8d222a 100644
--- a/pkg/sentry/fs/dev/net_tun.go
+++ b/pkg/sentry/fs/dev/net_tun.go
@@ -17,6 +17,7 @@ package dev
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
@@ -110,7 +111,7 @@ func (n *netTunFileOperations) Ioctl(ctx context.Context, file *fs.File, io user
}
// Validate flags.
- flags, err := netstack.LinuxToTUNFlags(usermem.ByteOrder.Uint16(req.Data[:]))
+ flags, err := netstack.LinuxToTUNFlags(hostarch.ByteOrder.Uint16(req.Data[:]))
if err != nil {
return 0, err
}
@@ -119,7 +120,7 @@ func (n *netTunFileOperations) Ioctl(ctx context.Context, file *fs.File, io user
case linux.TUNGETIFF:
var req linux.IFReq
copy(req.IFName[:], n.device.Name())
- usermem.ByteOrder.PutUint16(req.Data[:], netstack.TUNFlagsToLinux(n.device.Flags()))
+ hostarch.ByteOrder.PutUint16(req.Data[:], netstack.TUNFlagsToLinux(n.device.Flags()))
_, err := req.CopyOut(t, data)
return 0, err
diff --git a/pkg/sentry/fs/fdpipe/BUILD b/pkg/sentry/fs/fdpipe/BUILD
index c83baf464..2120f2bad 100644
--- a/pkg/sentry/fs/fdpipe/BUILD
+++ b/pkg/sentry/fs/fdpipe/BUILD
@@ -40,6 +40,7 @@ go_test(
"//pkg/context",
"//pkg/fd",
"//pkg/fdnotifier",
+ "//pkg/hostarch",
"//pkg/sentry/contexttest",
"//pkg/sentry/fs",
"//pkg/syserror",
diff --git a/pkg/sentry/fs/fdpipe/pipe_test.go b/pkg/sentry/fs/fdpipe/pipe_test.go
index faeb3908c..ab0e9dac7 100644
--- a/pkg/sentry/fs/fdpipe/pipe_test.go
+++ b/pkg/sentry/fs/fdpipe/pipe_test.go
@@ -27,6 +27,8 @@ import (
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
+
+ "gvisor.dev/gvisor/pkg/hostarch"
)
func singlePipeFD() (int, error) {
@@ -52,7 +54,7 @@ func mockPipeDirent(t *testing.T) *fs.Dirent {
}
inode := fs.NewInode(ctx, node, fs.NewMockMountSource(nil), fs.StableAttr{
Type: fs.Pipe,
- BlockSize: usermem.PageSize,
+ BlockSize: hostarch.PageSize,
})
return fs.NewDirent(ctx, inode, "")
}
diff --git a/pkg/sentry/fs/fsutil/BUILD b/pkg/sentry/fs/fsutil/BUILD
index d388f0e92..6469cc3a9 100644
--- a/pkg/sentry/fs/fsutil/BUILD
+++ b/pkg/sentry/fs/fsutil/BUILD
@@ -76,6 +76,7 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/hostarch",
"//pkg/log",
"//pkg/safemem",
"//pkg/sentry/arch",
@@ -105,6 +106,7 @@ go_test(
library = ":fsutil",
deps = [
"//pkg/context",
+ "//pkg/hostarch",
"//pkg/safemem",
"//pkg/sentry/contexttest",
"//pkg/sentry/fs",
diff --git a/pkg/sentry/fs/fsutil/dirty_set.go b/pkg/sentry/fs/fsutil/dirty_set.go
index 2c9446c1d..38383e730 100644
--- a/pkg/sentry/fs/fsutil/dirty_set.go
+++ b/pkg/sentry/fs/fsutil/dirty_set.go
@@ -18,9 +18,9 @@ import (
"math"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/memmap"
- "gvisor.dev/gvisor/pkg/usermem"
)
// DirtySet maps offsets into a memmap.Mappable to DirtyInfo. It is used to
@@ -215,7 +215,7 @@ func syncDirtyRange(ctx context.Context, mr memmap.MappableRange, cache *FileRan
if max < wbr.Start {
break
}
- ims, err := mem.MapInternal(cseg.FileRangeOf(wbr), usermem.Read)
+ ims, err := mem.MapInternal(cseg.FileRangeOf(wbr), hostarch.Read)
if err != nil {
return err
}
diff --git a/pkg/sentry/fs/fsutil/dirty_set_test.go b/pkg/sentry/fs/fsutil/dirty_set_test.go
index e3579c23c..48448c97c 100644
--- a/pkg/sentry/fs/fsutil/dirty_set_test.go
+++ b/pkg/sentry/fs/fsutil/dirty_set_test.go
@@ -18,18 +18,18 @@ import (
"reflect"
"testing"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/memmap"
- "gvisor.dev/gvisor/pkg/usermem"
)
func TestDirtySet(t *testing.T) {
var set DirtySet
- set.MarkDirty(memmap.MappableRange{0, 2 * usermem.PageSize})
- set.KeepDirty(memmap.MappableRange{usermem.PageSize, 2 * usermem.PageSize})
- set.MarkClean(memmap.MappableRange{0, 2 * usermem.PageSize})
+ set.MarkDirty(memmap.MappableRange{0, 2 * hostarch.PageSize})
+ set.KeepDirty(memmap.MappableRange{hostarch.PageSize, 2 * hostarch.PageSize})
+ set.MarkClean(memmap.MappableRange{0, 2 * hostarch.PageSize})
want := &DirtySegmentDataSlices{
- Start: []uint64{usermem.PageSize},
- End: []uint64{2 * usermem.PageSize},
+ Start: []uint64{hostarch.PageSize},
+ End: []uint64{2 * hostarch.PageSize},
Values: []DirtyInfo{{Keep: true}},
}
if got := set.ExportSortedSlices(); !reflect.DeepEqual(got, want) {
diff --git a/pkg/sentry/fs/fsutil/file_range_set.go b/pkg/sentry/fs/fsutil/file_range_set.go
index 1dc409d38..fdaceb1db 100644
--- a/pkg/sentry/fs/fsutil/file_range_set.go
+++ b/pkg/sentry/fs/fsutil/file_range_set.go
@@ -20,11 +20,11 @@ import (
"math"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/pgalloc"
"gvisor.dev/gvisor/pkg/sentry/usage"
- "gvisor.dev/gvisor/pkg/usermem"
)
// FileRangeSet maps offsets into a memmap.Mappable to offsets into a
@@ -130,7 +130,7 @@ func (frs *FileRangeSet) Fill(ctx context.Context, required, optional memmap.Map
// MemoryFile.AllocateAndFill truncates down to a page
// boundary, but FileRangeSet.Fill is supposed to
// zero-fill to the end of the page in this case.
- donepgaddr, ok := usermem.Addr(done).RoundUp()
+ donepgaddr, ok := hostarch.Addr(done).RoundUp()
if donepg := uint64(donepgaddr); ok && donepg != done {
dsts.DropFirst64(donepg - done)
done = donepg
@@ -184,7 +184,7 @@ func (frs *FileRangeSet) DropAll(mf *pgalloc.MemoryFile) {
// bytes after the new EOF on the same page are zeroed, and pages after the new
// EOF are freed.
func (frs *FileRangeSet) Truncate(end uint64, mf *pgalloc.MemoryFile) {
- pgendaddr, ok := usermem.Addr(end).RoundUp()
+ pgendaddr, ok := hostarch.Addr(end).RoundUp()
if ok {
pgend := uint64(pgendaddr)
@@ -208,7 +208,7 @@ func (frs *FileRangeSet) Truncate(end uint64, mf *pgalloc.MemoryFile) {
if seg.Ok() {
fr := seg.FileRange()
fr.Start += end - seg.Start()
- ims, err := mf.MapInternal(fr, usermem.Write)
+ ims, err := mf.MapInternal(fr, hostarch.Write)
if err != nil {
// There's no good recourse from here. This means
// that we can't keep cached memory consistent with
diff --git a/pkg/sentry/fs/fsutil/host_file_mapper.go b/pkg/sentry/fs/fsutil/host_file_mapper.go
index 54f7b7cdc..23528bf25 100644
--- a/pkg/sentry/fs/fsutil/host_file_mapper.go
+++ b/pkg/sentry/fs/fsutil/host_file_mapper.go
@@ -18,11 +18,11 @@ import (
"fmt"
"golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/usermem"
)
// HostFileMapper caches mappings of an arbitrary host file descriptor. It is
@@ -50,13 +50,13 @@ type HostFileMapper struct {
}
const (
- chunkShift = usermem.HugePageShift
+ chunkShift = hostarch.HugePageShift
chunkSize = 1 << chunkShift
chunkMask = chunkSize - 1
)
func pagesInChunk(mr memmap.MappableRange, chunkStart uint64) int32 {
- return int32(mr.Intersect(memmap.MappableRange{chunkStart, chunkStart + chunkSize}).Length() / usermem.PageSize)
+ return int32(mr.Intersect(memmap.MappableRange{chunkStart, chunkStart + chunkSize}).Length() / hostarch.PageSize)
}
type mapping struct {
diff --git a/pkg/sentry/fs/fsutil/host_mappable.go b/pkg/sentry/fs/fsutil/host_mappable.go
index c15d8a946..e1e38b498 100644
--- a/pkg/sentry/fs/fsutil/host_mappable.go
+++ b/pkg/sentry/fs/fsutil/host_mappable.go
@@ -18,6 +18,7 @@ import (
"math"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/memmap"
@@ -59,7 +60,7 @@ func NewHostMappable(backingFile CachedFileObject) *HostMappable {
}
// AddMapping implements memmap.Mappable.AddMapping.
-func (h *HostMappable) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) error {
+func (h *HostMappable) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error {
// Hot path. Avoid defers.
h.mu.Lock()
mapped := h.mappings.AddMapping(ms, ar, offset, writable)
@@ -71,7 +72,7 @@ func (h *HostMappable) AddMapping(ctx context.Context, ms memmap.MappingSpace, a
}
// RemoveMapping implements memmap.Mappable.RemoveMapping.
-func (h *HostMappable) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) {
+func (h *HostMappable) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) {
// Hot path. Avoid defers.
h.mu.Lock()
unmapped := h.mappings.RemoveMapping(ms, ar, offset, writable)
@@ -82,18 +83,18 @@ func (h *HostMappable) RemoveMapping(ctx context.Context, ms memmap.MappingSpace
}
// CopyMapping implements memmap.Mappable.CopyMapping.
-func (h *HostMappable) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, writable bool) error {
+func (h *HostMappable) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error {
return h.AddMapping(ctx, ms, dstAR, offset, writable)
}
// Translate implements memmap.Mappable.Translate.
-func (h *HostMappable) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) {
+func (h *HostMappable) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) {
return []memmap.Translation{
{
Source: optional,
File: h,
Offset: optional.Start,
- Perms: usermem.AnyAccess,
+ Perms: hostarch.AnyAccess,
},
}, nil
}
@@ -124,7 +125,7 @@ func (h *HostMappable) NotifyChangeFD() error {
}
// MapInternal implements memmap.File.MapInternal.
-func (h *HostMappable) MapInternal(fr memmap.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) {
+func (h *HostMappable) MapInternal(fr memmap.FileRange, at hostarch.AccessType) (safemem.BlockSeq, error) {
return h.hostFileMapper.MapInternal(fr, h.backingFile.FD(), at.Write)
}
diff --git a/pkg/sentry/fs/fsutil/inode_cached.go b/pkg/sentry/fs/fsutil/inode_cached.go
index 0ed7aafa5..7856b354b 100644
--- a/pkg/sentry/fs/fsutil/inode_cached.go
+++ b/pkg/sentry/fs/fsutil/inode_cached.go
@@ -19,6 +19,7 @@ import (
"io"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/fs"
@@ -622,7 +623,7 @@ func (rw *inodeReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) {
switch {
case seg.Ok():
// Get internal mappings from the cache.
- ims, err := mem.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), usermem.Read)
+ ims, err := mem.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), hostarch.Read)
if err != nil {
unlock()
return done, err
@@ -647,7 +648,7 @@ func (rw *inodeReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) {
// Read into the cache, then re-enter the loop to read from the
// cache.
reqMR := memmap.MappableRange{
- Start: uint64(usermem.Addr(gapMR.Start).RoundDown()),
+ Start: uint64(hostarch.Addr(gapMR.Start).RoundDown()),
End: fs.OffsetPageEnd(int64(gapMR.End)),
}
optMR := gap.Range()
@@ -729,7 +730,7 @@ func (rw *inodeReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error
case seg.Ok() && seg.Start() < mr.End:
// Get internal mappings from the cache.
segMR := seg.Range().Intersect(mr)
- ims, err := mf.MapInternal(seg.FileRangeOf(segMR), usermem.Write)
+ ims, err := mf.MapInternal(seg.FileRangeOf(segMR), hostarch.Write)
if err != nil {
rw.maybeGrowFile()
rw.c.dataMu.Unlock()
@@ -786,7 +787,7 @@ func (c *CachingInodeOperations) useHostPageCache() bool {
}
// AddMapping implements memmap.Mappable.AddMapping.
-func (c *CachingInodeOperations) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) error {
+func (c *CachingInodeOperations) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error {
// Hot path. Avoid defers.
c.mapsMu.Lock()
mapped := c.mappings.AddMapping(ms, ar, offset, writable)
@@ -808,7 +809,7 @@ func (c *CachingInodeOperations) AddMapping(ctx context.Context, ms memmap.Mappi
}
// RemoveMapping implements memmap.Mappable.RemoveMapping.
-func (c *CachingInodeOperations) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) {
+func (c *CachingInodeOperations) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) {
// Hot path. Avoid defers.
c.mapsMu.Lock()
unmapped := c.mappings.RemoveMapping(ms, ar, offset, writable)
@@ -836,12 +837,12 @@ func (c *CachingInodeOperations) RemoveMapping(ctx context.Context, ms memmap.Ma
}
// CopyMapping implements memmap.Mappable.CopyMapping.
-func (c *CachingInodeOperations) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, writable bool) error {
+func (c *CachingInodeOperations) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error {
return c.AddMapping(ctx, ms, dstAR, offset, writable)
}
// Translate implements memmap.Mappable.Translate.
-func (c *CachingInodeOperations) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) {
+func (c *CachingInodeOperations) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) {
// Hot path. Avoid defer.
if c.useHostPageCache() {
mr := optional
@@ -853,7 +854,7 @@ func (c *CachingInodeOperations) Translate(ctx context.Context, required, option
Source: mr,
File: c,
Offset: mr.Start,
- Perms: usermem.AnyAccess,
+ Perms: hostarch.AnyAccess,
},
}, nil
}
@@ -885,7 +886,7 @@ func (c *CachingInodeOperations) Translate(ctx context.Context, required, option
segMR := seg.Range().Intersect(optional)
// TODO(jamieliu): Make Translations writable even if writability is
// not required if already kept-dirty by another writable translation.
- perms := usermem.AccessType{
+ perms := hostarch.AccessType{
Read: true,
Execute: true,
}
@@ -1050,7 +1051,7 @@ func (c *CachingInodeOperations) DecRef(fr memmap.FileRange) {
// MapInternal implements memmap.File.MapInternal. This is used when we
// directly map an underlying host fd and CachingInodeOperations is used as the
// memmap.File during translation.
-func (c *CachingInodeOperations) MapInternal(fr memmap.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) {
+func (c *CachingInodeOperations) MapInternal(fr memmap.FileRange, at hostarch.AccessType) (safemem.BlockSeq, error) {
return c.hostFileMapper.MapInternal(fr, c.backingFile.FD(), at.Write)
}
diff --git a/pkg/sentry/fs/fsutil/inode_cached_test.go b/pkg/sentry/fs/fsutil/inode_cached_test.go
index 1547584c5..e107c3096 100644
--- a/pkg/sentry/fs/fsutil/inode_cached_test.go
+++ b/pkg/sentry/fs/fsutil/inode_cached_test.go
@@ -20,6 +20,7 @@ import (
"testing"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/contexttest"
"gvisor.dev/gvisor/pkg/sentry/fs"
@@ -249,7 +250,7 @@ func (f *sliceBackingFile) Allocate(ctx context.Context, offset int64, length in
type noopMappingSpace struct{}
// Invalidate implements memmap.MappingSpace.Invalidate.
-func (noopMappingSpace) Invalidate(ar usermem.AddrRange, opts memmap.InvalidateOpts) {
+func (noopMappingSpace) Invalidate(ar hostarch.AddrRange, opts memmap.InvalidateOpts) {
}
func anonInode(ctx context.Context) *fs.Inode {
@@ -259,14 +260,14 @@ func anonInode(ctx context.Context) *fs.Inode {
}, 0),
}, fs.NewPseudoMountSource(ctx), fs.StableAttr{
Type: fs.Anonymous,
- BlockSize: usermem.PageSize,
+ BlockSize: hostarch.PageSize,
})
}
func pagesOf(bs ...byte) []byte {
- buf := make([]byte, 0, len(bs)*usermem.PageSize)
+ buf := make([]byte, 0, len(bs)*hostarch.PageSize)
for _, b := range bs {
- buf = append(buf, bytes.Repeat([]byte{b}, usermem.PageSize)...)
+ buf = append(buf, bytes.Repeat([]byte{b}, hostarch.PageSize)...)
}
return buf
}
@@ -292,28 +293,28 @@ func TestRead(t *testing.T) {
// expects to only cache mapped pages), then call Translate to force it to
// be cached.
var ms noopMappingSpace
- ar := usermem.AddrRange{usermem.PageSize, 2 * usermem.PageSize}
- if err := iops.AddMapping(ctx, ms, ar, usermem.PageSize, true); err != nil {
+ ar := hostarch.AddrRange{hostarch.PageSize, 2 * hostarch.PageSize}
+ if err := iops.AddMapping(ctx, ms, ar, hostarch.PageSize, true); err != nil {
t.Fatalf("AddMapping got %v, want nil", err)
}
- mr := memmap.MappableRange{usermem.PageSize, 2 * usermem.PageSize}
- if _, err := iops.Translate(ctx, mr, mr, usermem.Read); err != nil {
+ mr := memmap.MappableRange{hostarch.PageSize, 2 * hostarch.PageSize}
+ if _, err := iops.Translate(ctx, mr, mr, hostarch.Read); err != nil {
t.Fatalf("Translate got %v, want nil", err)
}
- if cached := iops.cache.Span(); cached != usermem.PageSize {
- t.Errorf("SpanRange got %d, want %d", cached, usermem.PageSize)
+ if cached := iops.cache.Span(); cached != hostarch.PageSize {
+ t.Errorf("SpanRange got %d, want %d", cached, hostarch.PageSize)
}
// Try to read 4 pages. The first and third pages should be read directly
// from the "file", the second page should be read from the cache, and only
// 3 pages (the size of the file) should be readable.
- rbuf := make([]byte, 4*usermem.PageSize)
+ rbuf := make([]byte, 4*hostarch.PageSize)
dst := usermem.BytesIOSequence(rbuf)
n, err := iops.Read(ctx, file, dst, 0)
- if n != 3*usermem.PageSize || (err != nil && err != io.EOF) {
- t.Fatalf("Read got (%d, %v), want (%d, nil or EOF)", n, err, 3*usermem.PageSize)
+ if n != 3*hostarch.PageSize || (err != nil && err != io.EOF) {
+ t.Fatalf("Read got (%d, %v), want (%d, nil or EOF)", n, err, 3*hostarch.PageSize)
}
- rbuf = rbuf[:3*usermem.PageSize]
+ rbuf = rbuf[:3*hostarch.PageSize]
// Did we get the bytes we expect?
if !bytes.Equal(rbuf, buf) {
@@ -323,7 +324,7 @@ func TestRead(t *testing.T) {
// Delete the memory mapping before iops.Release(). The cached page will
// either be evicted by ctx's pgalloc.MemoryFile, or dropped by
// iops.Release().
- iops.RemoveMapping(ctx, ms, ar, usermem.PageSize, true)
+ iops.RemoveMapping(ctx, ms, ar, hostarch.PageSize, true)
}
func TestWrite(t *testing.T) {
@@ -348,25 +349,25 @@ func TestWrite(t *testing.T) {
// CachingInodeOperations expects to only cache mapped pages), then call
// Translate to force them to be cached.
var ms noopMappingSpace
- ar := usermem.AddrRange{usermem.PageSize, 3 * usermem.PageSize}
- if err := iops.AddMapping(ctx, ms, ar, usermem.PageSize, true); err != nil {
+ ar := hostarch.AddrRange{hostarch.PageSize, 3 * hostarch.PageSize}
+ if err := iops.AddMapping(ctx, ms, ar, hostarch.PageSize, true); err != nil {
t.Fatalf("AddMapping got %v, want nil", err)
}
- defer iops.RemoveMapping(ctx, ms, ar, usermem.PageSize, true)
- mr := memmap.MappableRange{usermem.PageSize, 3 * usermem.PageSize}
- if _, err := iops.Translate(ctx, mr, mr, usermem.Read); err != nil {
+ defer iops.RemoveMapping(ctx, ms, ar, hostarch.PageSize, true)
+ mr := memmap.MappableRange{hostarch.PageSize, 3 * hostarch.PageSize}
+ if _, err := iops.Translate(ctx, mr, mr, hostarch.Read); err != nil {
t.Fatalf("Translate got %v, want nil", err)
}
- if cached := iops.cache.Span(); cached != 2*usermem.PageSize {
- t.Errorf("SpanRange got %d, want %d", cached, 2*usermem.PageSize)
+ if cached := iops.cache.Span(); cached != 2*hostarch.PageSize {
+ t.Errorf("SpanRange got %d, want %d", cached, 2*hostarch.PageSize)
}
// Write to the first 2 pages.
wbuf := pagesOf('e', 'f')
src := usermem.BytesIOSequence(wbuf)
n, err := iops.Write(ctx, src, 0)
- if n != 2*usermem.PageSize || err != nil {
- t.Fatalf("Write got (%d, %v), want (%d, nil)", n, err, 2*usermem.PageSize)
+ if n != 2*hostarch.PageSize || err != nil {
+ t.Fatalf("Write got (%d, %v), want (%d, nil)", n, err, 2*hostarch.PageSize)
}
// The first page should have been written directly, since it was not cached.
@@ -382,7 +383,7 @@ func TestWrite(t *testing.T) {
}
// Now the second page should have been written as well.
- copy(want[usermem.PageSize:], pagesOf('f'))
+ copy(want[hostarch.PageSize:], pagesOf('f'))
if !bytes.Equal(buf, want) {
t.Errorf("File contents are %v, want %v", buf, want)
}
diff --git a/pkg/sentry/fs/gofer/BUILD b/pkg/sentry/fs/gofer/BUILD
index b210e0e7e..c4a069832 100644
--- a/pkg/sentry/fs/gofer/BUILD
+++ b/pkg/sentry/fs/gofer/BUILD
@@ -27,6 +27,7 @@ go_library(
"//pkg/abi/linux",
"//pkg/context",
"//pkg/fd",
+ "//pkg/hostarch",
"//pkg/log",
"//pkg/p9",
"//pkg/refs",
diff --git a/pkg/sentry/fs/gofer/attr.go b/pkg/sentry/fs/gofer/attr.go
index cffc756cc..d6bff3f40 100644
--- a/pkg/sentry/fs/gofer/attr.go
+++ b/pkg/sentry/fs/gofer/attr.go
@@ -17,11 +17,11 @@ package gofer
import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/p9"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
- "gvisor.dev/gvisor/pkg/usermem"
)
// getattr returns the 9p attributes of the p9.File. On success, Mode, Size, and RDev
@@ -98,7 +98,7 @@ func bsize(pattr p9.Attr) int64 {
// Some files, particularly those that are not on a local file system,
// may have no clue of their block size. Better not to report something
// misleading or buggy and have a safe default.
- return usermem.PageSize
+ return hostarch.PageSize
}
// ntype returns an fs.InodeType from 9p attributes.
diff --git a/pkg/sentry/fs/inotify.go b/pkg/sentry/fs/inotify.go
index fb81d903d..1b83643db 100644
--- a/pkg/sentry/fs/inotify.go
+++ b/pkg/sentry/fs/inotify.go
@@ -20,6 +20,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/uniqueid"
@@ -216,7 +217,7 @@ func (i *Inotify) Ioctl(ctx context.Context, _ *File, io usermem.IO, args arch.S
n += uint32(e.sizeOf())
}
var buf [4]byte
- usermem.ByteOrder.PutUint32(buf[:], n)
+ hostarch.ByteOrder.PutUint32(buf[:], n)
_, err := io.CopyOut(ctx, args[2].Pointer(), buf[:], usermem.IOOpts{})
return 0, err
diff --git a/pkg/sentry/fs/inotify_event.go b/pkg/sentry/fs/inotify_event.go
index 686e1b1cd..399aff1ed 100644
--- a/pkg/sentry/fs/inotify_event.go
+++ b/pkg/sentry/fs/inotify_event.go
@@ -19,6 +19,7 @@ import (
"fmt"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -100,10 +101,10 @@ func (e *Event) sizeOf() int {
// construct the output. We use a buffer allocated ahead of time for
// performance. buf must be at least inotifyEventBaseSize bytes.
func (e *Event) CopyTo(ctx context.Context, buf []byte, dst usermem.IOSequence) (int64, error) {
- usermem.ByteOrder.PutUint32(buf[0:], uint32(e.wd))
- usermem.ByteOrder.PutUint32(buf[4:], e.mask)
- usermem.ByteOrder.PutUint32(buf[8:], e.cookie)
- usermem.ByteOrder.PutUint32(buf[12:], e.len)
+ hostarch.ByteOrder.PutUint32(buf[0:], uint32(e.wd))
+ hostarch.ByteOrder.PutUint32(buf[4:], e.mask)
+ hostarch.ByteOrder.PutUint32(buf[8:], e.cookie)
+ hostarch.ByteOrder.PutUint32(buf[12:], e.len)
writeLen := 0
diff --git a/pkg/sentry/fs/offset.go b/pkg/sentry/fs/offset.go
index 53b5df175..3a8c97d8f 100644
--- a/pkg/sentry/fs/offset.go
+++ b/pkg/sentry/fs/offset.go
@@ -17,14 +17,14 @@ package fs
import (
"math"
- "gvisor.dev/gvisor/pkg/usermem"
+ "gvisor.dev/gvisor/pkg/hostarch"
)
// OffsetPageEnd returns the file offset rounded up to the nearest
// page boundary. OffsetPageEnd panics if rounding up causes overflow,
// which shouldn't be possible given that offset is an int64.
func OffsetPageEnd(offset int64) uint64 {
- end, ok := usermem.Addr(offset).RoundUp()
+ end, ok := hostarch.Addr(offset).RoundUp()
if !ok {
panic("impossible overflow")
}
diff --git a/pkg/sentry/fs/overlay.go b/pkg/sentry/fs/overlay.go
index 01a1235b8..f96f5a3e5 100644
--- a/pkg/sentry/fs/overlay.go
+++ b/pkg/sentry/fs/overlay.go
@@ -19,11 +19,11 @@ import (
"strings"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
// The virtual filesystem implements an overlay configuration. For a high-level
@@ -274,7 +274,7 @@ func (o *overlayEntry) markDirectoryDirty() {
}
// AddMapping implements memmap.Mappable.AddMapping.
-func (o *overlayEntry) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) error {
+func (o *overlayEntry) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error {
o.mapsMu.Lock()
defer o.mapsMu.Unlock()
if err := o.inodeLocked().Mappable().AddMapping(ctx, ms, ar, offset, writable); err != nil {
@@ -285,7 +285,7 @@ func (o *overlayEntry) AddMapping(ctx context.Context, ms memmap.MappingSpace, a
}
// RemoveMapping implements memmap.Mappable.RemoveMapping.
-func (o *overlayEntry) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) {
+func (o *overlayEntry) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) {
o.mapsMu.Lock()
defer o.mapsMu.Unlock()
o.inodeLocked().Mappable().RemoveMapping(ctx, ms, ar, offset, writable)
@@ -293,7 +293,7 @@ func (o *overlayEntry) RemoveMapping(ctx context.Context, ms memmap.MappingSpace
}
// CopyMapping implements memmap.Mappable.CopyMapping.
-func (o *overlayEntry) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, writable bool) error {
+func (o *overlayEntry) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error {
o.mapsMu.Lock()
defer o.mapsMu.Unlock()
if err := o.inodeLocked().Mappable().CopyMapping(ctx, ms, srcAR, dstAR, offset, writable); err != nil {
@@ -304,7 +304,7 @@ func (o *overlayEntry) CopyMapping(ctx context.Context, ms memmap.MappingSpace,
}
// Translate implements memmap.Mappable.Translate.
-func (o *overlayEntry) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) {
+func (o *overlayEntry) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) {
o.dataMu.RLock()
defer o.dataMu.RUnlock()
return o.inodeLocked().Mappable().Translate(ctx, required, optional, at)
diff --git a/pkg/sentry/fs/proc/BUILD b/pkg/sentry/fs/proc/BUILD
index b8b2281a8..7af7e0b45 100644
--- a/pkg/sentry/fs/proc/BUILD
+++ b/pkg/sentry/fs/proc/BUILD
@@ -30,6 +30,7 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/hostarch",
"//pkg/log",
"//pkg/sentry/fs",
"//pkg/sentry/fs/fsutil",
diff --git a/pkg/sentry/fs/proc/exec_args.go b/pkg/sentry/fs/proc/exec_args.go
index e6171dd1d..24426b225 100644
--- a/pkg/sentry/fs/proc/exec_args.go
+++ b/pkg/sentry/fs/proc/exec_args.go
@@ -21,6 +21,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
"gvisor.dev/gvisor/pkg/sentry/kernel"
@@ -113,7 +114,7 @@ func (f *execArgFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequen
defer m.DecUsers(ctx)
// Figure out the bounds of the exec arg we are trying to read.
- var execArgStart, execArgEnd usermem.Addr
+ var execArgStart, execArgEnd hostarch.Addr
switch f.arg {
case cmdlineExecArg:
execArgStart, execArgEnd = m.ArgvStart(), m.ArgvEnd()
@@ -172,8 +173,8 @@ func (f *execArgFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequen
// https://elixir.bootlin.com/linux/v4.20/source/fs/proc/base.c#L208
// we'll return one page total between argv and envp because of the
// above page restrictions.
- if lengthEnvv > usermem.PageSize-len(buf) {
- lengthEnvv = usermem.PageSize - len(buf)
+ if lengthEnvv > hostarch.PageSize-len(buf) {
+ lengthEnvv = hostarch.PageSize - len(buf)
}
// Make a new buffer to fit the whole thing
tmp := make([]byte, length+lengthEnvv)
diff --git a/pkg/sentry/fs/proc/inode.go b/pkg/sentry/fs/proc/inode.go
index d2859a4c2..78132f7a5 100644
--- a/pkg/sentry/fs/proc/inode.go
+++ b/pkg/sentry/fs/proc/inode.go
@@ -17,13 +17,13 @@ package proc
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
"gvisor.dev/gvisor/pkg/sentry/fs/proc/device"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/mm"
- "gvisor.dev/gvisor/pkg/usermem"
)
// LINT.IfChange
@@ -125,7 +125,7 @@ func newProcInode(ctx context.Context, iops fs.InodeOperations, msrc *fs.MountSo
sattr := fs.StableAttr{
DeviceID: device.ProcDevice.DeviceID(),
InodeID: device.ProcDevice.NextIno(),
- BlockSize: usermem.PageSize,
+ BlockSize: hostarch.PageSize,
Type: typ,
}
if t != nil {
diff --git a/pkg/sentry/fs/proc/meminfo.go b/pkg/sentry/fs/proc/meminfo.go
index 91617267d..7d975d333 100644
--- a/pkg/sentry/fs/proc/meminfo.go
+++ b/pkg/sentry/fs/proc/meminfo.go
@@ -19,10 +19,10 @@ import (
"fmt"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/usage"
- "gvisor.dev/gvisor/pkg/usermem"
)
// LINT.IfChange
@@ -53,7 +53,7 @@ func (d *meminfoData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle)
anon := snapshot.Anonymous + snapshot.Tmpfs
file := snapshot.PageCache + snapshot.Mapped
// We don't actually have active/inactive LRUs, so just make up numbers.
- activeFile := (file / 2) &^ (usermem.PageSize - 1)
+ activeFile := (file / 2) &^ (hostarch.PageSize - 1)
inactiveFile := file - activeFile
var buf bytes.Buffer
diff --git a/pkg/sentry/fs/proc/net.go b/pkg/sentry/fs/proc/net.go
index 203cfa061..91c35eea9 100644
--- a/pkg/sentry/fs/proc/net.go
+++ b/pkg/sentry/fs/proc/net.go
@@ -23,6 +23,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile"
@@ -35,7 +36,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/tcpip/header"
- "gvisor.dev/gvisor/pkg/usermem"
)
// LINT.IfChange
@@ -367,10 +367,10 @@ func (n *netRoute) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]
)
if len(rt.GatewayAddr) == header.IPv4AddressSize {
flags |= linux.RTF_GATEWAY
- gw = usermem.ByteOrder.Uint32(rt.GatewayAddr)
+ gw = hostarch.ByteOrder.Uint32(rt.GatewayAddr)
}
if len(rt.DstAddr) == header.IPv4AddressSize {
- prefix = usermem.ByteOrder.Uint32(rt.DstAddr)
+ prefix = hostarch.ByteOrder.Uint32(rt.DstAddr)
}
l := fmt.Sprintf(
"%s\t%08X\t%08X\t%04X\t%d\t%d\t%d\t%08X\t%d\t%d\t%d",
@@ -520,7 +520,7 @@ func networkToHost16(n uint16) uint16 {
// binary.BigEndian.Uint16() require a read of binary.BigEndian and an
// interface method call, defeating inlining.
buf := [2]byte{byte(n >> 8 & 0xff), byte(n & 0xff)}
- return usermem.ByteOrder.Uint16(buf[:])
+ return hostarch.ByteOrder.Uint16(buf[:])
}
func writeInetAddr(w io.Writer, family int, i linux.SockAddr) {
@@ -542,14 +542,14 @@ func writeInetAddr(w io.Writer, family int, i linux.SockAddr) {
// __be32 which is a typedef for an unsigned int, and is printed with
// %X. This means that for a little-endian machine, Linux prints the
// least-significant byte of the address first. To emulate this, we first
- // invert the byte order for the address using usermem.ByteOrder.Uint32,
+ // invert the byte order for the address using hostarch.ByteOrder.Uint32,
// which makes it have the equivalent encoding to a __be32 on a little
// endian machine. Note that this operation is a no-op on a big endian
// machine. Then similar to Linux, we format it with %X, which will print
// the most-significant byte of the __be32 address first, which is now
// actually the least-significant byte of the original address in
// linux.SockAddrInet.Addr on little endian machines, due to the conversion.
- addr := usermem.ByteOrder.Uint32(a.Addr[:])
+ addr := hostarch.ByteOrder.Uint32(a.Addr[:])
fmt.Fprintf(w, "%08X:%04X ", addr, port)
case linux.AF_INET6:
@@ -559,10 +559,10 @@ func writeInetAddr(w io.Writer, family int, i linux.SockAddr) {
}
port := networkToHost16(a.Port)
- addr0 := usermem.ByteOrder.Uint32(a.Addr[0:4])
- addr1 := usermem.ByteOrder.Uint32(a.Addr[4:8])
- addr2 := usermem.ByteOrder.Uint32(a.Addr[8:12])
- addr3 := usermem.ByteOrder.Uint32(a.Addr[12:16])
+ addr0 := hostarch.ByteOrder.Uint32(a.Addr[0:4])
+ addr1 := hostarch.ByteOrder.Uint32(a.Addr[4:8])
+ addr2 := hostarch.ByteOrder.Uint32(a.Addr[8:12])
+ addr3 := hostarch.ByteOrder.Uint32(a.Addr[12:16])
fmt.Fprintf(w, "%08X%08X%08X%08X:%04X ", addr0, addr1, addr2, addr3, port)
}
}
diff --git a/pkg/sentry/fs/proc/seqfile/BUILD b/pkg/sentry/fs/proc/seqfile/BUILD
index 21338d912..713b81e08 100644
--- a/pkg/sentry/fs/proc/seqfile/BUILD
+++ b/pkg/sentry/fs/proc/seqfile/BUILD
@@ -9,6 +9,7 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/hostarch",
"//pkg/sentry/fs",
"//pkg/sentry/fs/fsutil",
"//pkg/sentry/fs/proc/device",
diff --git a/pkg/sentry/fs/proc/seqfile/seqfile.go b/pkg/sentry/fs/proc/seqfile/seqfile.go
index 6121f0e95..b01688b1d 100644
--- a/pkg/sentry/fs/proc/seqfile/seqfile.go
+++ b/pkg/sentry/fs/proc/seqfile/seqfile.go
@@ -20,6 +20,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
"gvisor.dev/gvisor/pkg/sentry/fs/proc/device"
@@ -131,7 +132,7 @@ func NewSeqFileInode(ctx context.Context, source SeqSource, msrc *fs.MountSource
sattr := fs.StableAttr{
DeviceID: device.ProcDevice.DeviceID(),
InodeID: device.ProcDevice.NextIno(),
- BlockSize: usermem.PageSize,
+ BlockSize: hostarch.PageSize,
Type: fs.SpecialFile,
}
return fs.NewInode(ctx, iops, msrc, sattr)
diff --git a/pkg/sentry/fs/proc/sys_net.go b/pkg/sentry/fs/proc/sys_net.go
index bbe282c03..1d09afdd7 100644
--- a/pkg/sentry/fs/proc/sys_net.go
+++ b/pkg/sentry/fs/proc/sys_net.go
@@ -21,6 +21,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
"gvisor.dev/gvisor/pkg/sentry/fs/proc/device"
@@ -76,7 +77,7 @@ func newTCPMemInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack, dir
sattr := fs.StableAttr{
DeviceID: device.ProcDevice.DeviceID(),
InodeID: device.ProcDevice.NextIno(),
- BlockSize: usermem.PageSize,
+ BlockSize: hostarch.PageSize,
Type: fs.SpecialFile,
}
return fs.NewInode(ctx, tm, msrc, sattr)
@@ -136,7 +137,7 @@ func (f *tcpMemFile) Write(ctx context.Context, _ *fs.File, src usermem.IOSequen
f.tcpMemInode.mu.Lock()
defer f.tcpMemInode.mu.Unlock()
- src = src.TakeFirst(usermem.PageSize - 1)
+ src = src.TakeFirst(hostarch.PageSize - 1)
size, err := readSize(f.tcpMemInode.dir, f.tcpMemInode.s)
if err != nil {
return 0, err
@@ -192,7 +193,7 @@ func newTCPSackInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *f
sattr := fs.StableAttr{
DeviceID: device.ProcDevice.DeviceID(),
InodeID: device.ProcDevice.NextIno(),
- BlockSize: usermem.PageSize,
+ BlockSize: hostarch.PageSize,
Type: fs.SpecialFile,
}
return fs.NewInode(ctx, ts, msrc, sattr)
@@ -264,7 +265,7 @@ func (f *tcpSackFile) Write(ctx context.Context, _ *fs.File, src usermem.IOSeque
// Only consider size of one memory page for input for performance reasons.
// We are only reading if it's zero or not anyway.
- src = src.TakeFirst(usermem.PageSize - 1)
+ src = src.TakeFirst(hostarch.PageSize - 1)
var v int32
n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts)
@@ -294,7 +295,7 @@ func newTCPRecoveryInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack
sattr := fs.StableAttr{
DeviceID: device.ProcDevice.DeviceID(),
InodeID: device.ProcDevice.NextIno(),
- BlockSize: usermem.PageSize,
+ BlockSize: hostarch.PageSize,
Type: fs.SpecialFile,
}
return fs.NewInode(ctx, ts, msrc, sattr)
@@ -354,7 +355,7 @@ func (f *tcpRecoveryFile) Write(ctx context.Context, _ *fs.File, src usermem.IOS
if src.NumBytes() == 0 {
return 0, nil
}
- src = src.TakeFirst(usermem.PageSize - 1)
+ src = src.TakeFirst(hostarch.PageSize - 1)
var v int32
n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts)
@@ -413,7 +414,7 @@ func newIPForwardingInode(ctx context.Context, msrc *fs.MountSource, s inet.Stac
sattr := fs.StableAttr{
DeviceID: device.ProcDevice.DeviceID(),
InodeID: device.ProcDevice.NextIno(),
- BlockSize: usermem.PageSize,
+ BlockSize: hostarch.PageSize,
Type: fs.SpecialFile,
}
return fs.NewInode(ctx, ipf, msrc, sattr)
@@ -486,7 +487,7 @@ func (f *ipForwardingFile) Write(ctx context.Context, _ *fs.File, src usermem.IO
// Only consider size of one memory page for input for performance reasons.
// We are only reading if it's zero or not anyway.
- src = src.TakeFirst(usermem.PageSize - 1)
+ src = src.TakeFirst(hostarch.PageSize - 1)
var v int32
n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts)
@@ -524,7 +525,7 @@ func newPortRangeInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack)
sattr := fs.StableAttr{
DeviceID: device.ProcDevice.DeviceID(),
InodeID: device.ProcDevice.NextIno(),
- BlockSize: usermem.PageSize,
+ BlockSize: hostarch.PageSize,
Type: fs.SpecialFile,
}
return fs.NewInode(ctx, ipf, msrc, sattr)
@@ -589,7 +590,7 @@ func (pf *portRangeFile) Write(ctx context.Context, _ *fs.File, src usermem.IOSe
// Only consider size of one memory page for input for performance
// reasons.
- src = src.TakeFirst(usermem.PageSize - 1)
+ src = src.TakeFirst(hostarch.PageSize - 1)
ports := make([]int32, 2)
n, err := usermem.CopyInt32StringsInVec(ctx, src.IO, src.Addrs, ports, src.Opts)
diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go
index f43d6c221..ae5ed25f9 100644
--- a/pkg/sentry/fs/proc/task.go
+++ b/pkg/sentry/fs/proc/task.go
@@ -23,6 +23,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
"gvisor.dev/gvisor/pkg/sentry/fs/proc/device"
@@ -469,7 +470,7 @@ func (m *memDataFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequen
defer mm.DecUsers(ctx)
// Buffer the read data because of MM locks
buf := make([]byte, dst.NumBytes())
- n, readErr := mm.CopyIn(ctx, usermem.Addr(offset), buf, usermem.IOOpts{IgnorePermissions: true})
+ n, readErr := mm.CopyIn(ctx, hostarch.Addr(offset), buf, usermem.IOOpts{IgnorePermissions: true})
if n > 0 {
if _, err := dst.CopyOut(ctx, buf[:n]); err != nil {
return 0, syserror.EFAULT
@@ -632,7 +633,7 @@ func (s *taskStatData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle)
rss = mm.ResidentSetSize()
}
})
- fmt.Fprintf(&buf, "%d %d ", vss, rss/usermem.PageSize)
+ fmt.Fprintf(&buf, "%d %d ", vss, rss/hostarch.PageSize)
// rsslim.
fmt.Fprintf(&buf, "%d ", s.t.ThreadGroup().Limits().Get(limits.Rss).Cur)
@@ -684,7 +685,7 @@ func (s *statmData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([
})
var buf bytes.Buffer
- fmt.Fprintf(&buf, "%d %d 0 0 0 0 0\n", vss/usermem.PageSize, rss/usermem.PageSize)
+ fmt.Fprintf(&buf, "%d %d 0 0 0 0 0\n", vss/hostarch.PageSize, rss/hostarch.PageSize)
return []seqfile.SeqData{{Buf: buf.Bytes(), Handle: (*statmData)(nil)}}, 0
}
@@ -939,8 +940,8 @@ func (f *auxvecFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequenc
buf := make([]byte, size)
for i, e := range auxv {
- usermem.ByteOrder.PutUint64(buf[16*i:], e.Key)
- usermem.ByteOrder.PutUint64(buf[16*i+8:], uint64(e.Value))
+ hostarch.ByteOrder.PutUint64(buf[16*i:], e.Key)
+ hostarch.ByteOrder.PutUint64(buf[16*i+8:], uint64(e.Value))
}
n, err := dst.CopyOut(ctx, buf[offset:])
@@ -1020,7 +1021,7 @@ func (f *oomScoreAdjFile) Write(ctx context.Context, _ *fs.File, src usermem.IOS
}
// Limit input size so as not to impact performance if input size is large.
- src = src.TakeFirst(usermem.PageSize - 1)
+ src = src.TakeFirst(hostarch.PageSize - 1)
var v int32
n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts)
diff --git a/pkg/sentry/fs/proc/uid_gid_map.go b/pkg/sentry/fs/proc/uid_gid_map.go
index 2bc9485d8..30d5ad4cf 100644
--- a/pkg/sentry/fs/proc/uid_gid_map.go
+++ b/pkg/sentry/fs/proc/uid_gid_map.go
@@ -21,6 +21,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
"gvisor.dev/gvisor/pkg/sentry/kernel"
@@ -132,7 +133,7 @@ func (imfo *idMapFileOperations) Write(ctx context.Context, file *fs.File, src u
// the system page size, and the write must be performed at the start of
// the file ..." - user_namespaces(7)
srclen := src.NumBytes()
- if srclen >= usermem.PageSize || offset != 0 {
+ if srclen >= hostarch.PageSize || offset != 0 {
return 0, syserror.EINVAL
}
b := make([]byte, srclen)
diff --git a/pkg/sentry/fs/ramfs/BUILD b/pkg/sentry/fs/ramfs/BUILD
index a51d00d86..4a3d9636b 100644
--- a/pkg/sentry/fs/ramfs/BUILD
+++ b/pkg/sentry/fs/ramfs/BUILD
@@ -14,13 +14,13 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/hostarch",
"//pkg/sentry/fs",
"//pkg/sentry/fs/anon",
"//pkg/sentry/fs/fsutil",
"//pkg/sentry/socket/unix/transport",
"//pkg/sync",
"//pkg/syserror",
- "//pkg/usermem",
"//pkg/waiter",
"@org_golang_x_sys//unix:go_default_library",
],
diff --git a/pkg/sentry/fs/ramfs/tree.go b/pkg/sentry/fs/ramfs/tree.go
index dfc9d3453..0ace636c9 100644
--- a/pkg/sentry/fs/ramfs/tree.go
+++ b/pkg/sentry/fs/ramfs/tree.go
@@ -20,9 +20,9 @@ import (
"strings"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/anon"
- "gvisor.dev/gvisor/pkg/usermem"
)
// MakeDirectoryTree constructs a ramfs tree of all directories containing
@@ -71,7 +71,7 @@ func emptyDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
return fs.NewInode(ctx, dir, msrc, fs.StableAttr{
DeviceID: anon.PseudoDevice.DeviceID(),
InodeID: anon.PseudoDevice.NextIno(),
- BlockSize: usermem.PageSize,
+ BlockSize: hostarch.PageSize,
Type: fs.Directory,
})
}
diff --git a/pkg/sentry/fs/sys/BUILD b/pkg/sentry/fs/sys/BUILD
index f2e8b9932..fdbc5f912 100644
--- a/pkg/sentry/fs/sys/BUILD
+++ b/pkg/sentry/fs/sys/BUILD
@@ -14,11 +14,11 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/hostarch",
"//pkg/sentry/device",
"//pkg/sentry/fs",
"//pkg/sentry/fs/fsutil",
"//pkg/sentry/fs/ramfs",
"//pkg/sentry/kernel",
- "//pkg/usermem",
],
)
diff --git a/pkg/sentry/fs/sys/sys.go b/pkg/sentry/fs/sys/sys.go
index 0891645e4..101779a7a 100644
--- a/pkg/sentry/fs/sys/sys.go
+++ b/pkg/sentry/fs/sys/sys.go
@@ -17,16 +17,16 @@ package sys
import (
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/ramfs"
- "gvisor.dev/gvisor/pkg/usermem"
)
func newFile(ctx context.Context, node fs.InodeOperations, msrc *fs.MountSource) *fs.Inode {
sattr := fs.StableAttr{
DeviceID: sysfsDevice.DeviceID(),
InodeID: sysfsDevice.NextIno(),
- BlockSize: usermem.PageSize,
+ BlockSize: hostarch.PageSize,
Type: fs.SpecialFile,
}
return fs.NewInode(ctx, node, msrc, sattr)
@@ -37,7 +37,7 @@ func newDir(ctx context.Context, msrc *fs.MountSource, contents map[string]*fs.I
return fs.NewInode(ctx, d, msrc, fs.StableAttr{
DeviceID: sysfsDevice.DeviceID(),
InodeID: sysfsDevice.NextIno(),
- BlockSize: usermem.PageSize,
+ BlockSize: hostarch.PageSize,
Type: fs.SpecialDirectory,
})
}
diff --git a/pkg/sentry/fs/timerfd/BUILD b/pkg/sentry/fs/timerfd/BUILD
index d16cdb4df..c7977a217 100644
--- a/pkg/sentry/fs/timerfd/BUILD
+++ b/pkg/sentry/fs/timerfd/BUILD
@@ -8,6 +8,7 @@ go_library(
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/context",
+ "//pkg/hostarch",
"//pkg/sentry/fs",
"//pkg/sentry/fs/anon",
"//pkg/sentry/fs/fsutil",
diff --git a/pkg/sentry/fs/timerfd/timerfd.go b/pkg/sentry/fs/timerfd/timerfd.go
index 46511a6ac..c8ebe256c 100644
--- a/pkg/sentry/fs/timerfd/timerfd.go
+++ b/pkg/sentry/fs/timerfd/timerfd.go
@@ -20,6 +20,7 @@ import (
"sync/atomic"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/anon"
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
@@ -124,7 +125,7 @@ func (t *TimerOperations) Read(ctx context.Context, file *fs.File, dst usermem.I
}
if val := atomic.SwapUint64(&t.val, 0); val != 0 {
var buf [sizeofUint64]byte
- usermem.ByteOrder.PutUint64(buf[:], val)
+ hostarch.ByteOrder.PutUint64(buf[:], val)
if _, err := dst.CopyOut(ctx, buf[:]); err != nil {
// Linux does not undo consuming the number of expirations even if
// writing to userspace fails.
diff --git a/pkg/sentry/fs/tmpfs/BUILD b/pkg/sentry/fs/tmpfs/BUILD
index b521a86a2..90398376a 100644
--- a/pkg/sentry/fs/tmpfs/BUILD
+++ b/pkg/sentry/fs/tmpfs/BUILD
@@ -15,6 +15,7 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/hostarch",
"//pkg/safemem",
"//pkg/sentry/device",
"//pkg/sentry/fs",
@@ -42,6 +43,7 @@ go_test(
library = ":tmpfs",
deps = [
"//pkg/context",
+ "//pkg/hostarch",
"//pkg/sentry/fs",
"//pkg/sentry/kernel/contexttest",
"//pkg/sentry/usage",
diff --git a/pkg/sentry/fs/tmpfs/file_test.go b/pkg/sentry/fs/tmpfs/file_test.go
index d4d613ea9..1718f9372 100644
--- a/pkg/sentry/fs/tmpfs/file_test.go
+++ b/pkg/sentry/fs/tmpfs/file_test.go
@@ -19,6 +19,7 @@ import (
"testing"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel/contexttest"
"gvisor.dev/gvisor/pkg/sentry/usage"
@@ -31,7 +32,7 @@ func newFileInode(ctx context.Context) *fs.Inode {
return fs.NewInode(ctx, iops, m, fs.StableAttr{
DeviceID: tmpfsDevice.DeviceID(),
InodeID: tmpfsDevice.NextIno(),
- BlockSize: usermem.PageSize,
+ BlockSize: hostarch.PageSize,
Type: fs.RegularFile,
})
}
diff --git a/pkg/sentry/fs/tmpfs/inode_file.go b/pkg/sentry/fs/tmpfs/inode_file.go
index ad4aea282..f4de8c968 100644
--- a/pkg/sentry/fs/tmpfs/inode_file.go
+++ b/pkg/sentry/fs/tmpfs/inode_file.go
@@ -21,6 +21,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
@@ -125,7 +126,7 @@ func NewMemfdInode(ctx context.Context, allowSeals bool) *fs.Inode {
Type: fs.RegularFile,
DeviceID: tmpfsDevice.DeviceID(),
InodeID: tmpfsDevice.NextIno(),
- BlockSize: usermem.PageSize,
+ BlockSize: hostarch.PageSize,
})
}
@@ -392,7 +393,7 @@ func (rw *fileReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) {
switch {
case seg.Ok():
// Get internal mappings.
- ims, err := mf.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), usermem.Read)
+ ims, err := mf.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), hostarch.Read)
if err != nil {
return done, err
}
@@ -463,7 +464,7 @@ func (rw *fileReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error)
//
// See Linux, mm/filemap.c:generic_perform_write() and
// mm/shmem.c:shmem_write_begin().
- if pgstart := int64(usermem.Addr(rw.f.attr.Size).RoundDown()); end > pgstart {
+ if pgstart := int64(hostarch.Addr(rw.f.attr.Size).RoundDown()); end > pgstart {
end = pgstart
}
if end <= rw.offset {
@@ -483,8 +484,8 @@ func (rw *fileReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error)
mf := rw.f.kernel.MemoryFile()
// Page-aligned mr for when we need to allocate memory. RoundUp can't
// overflow since end is an int64.
- pgstartaddr := usermem.Addr(rw.offset).RoundDown()
- pgendaddr, _ := usermem.Addr(end).RoundUp()
+ pgstartaddr := hostarch.Addr(rw.offset).RoundDown()
+ pgendaddr, _ := hostarch.Addr(end).RoundUp()
pgMR := memmap.MappableRange{uint64(pgstartaddr), uint64(pgendaddr)}
var done uint64
@@ -494,7 +495,7 @@ func (rw *fileReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error)
switch {
case seg.Ok():
// Get internal mappings.
- ims, err := mf.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), usermem.Write)
+ ims, err := mf.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), hostarch.Write)
if err != nil {
return done, err
}
@@ -527,7 +528,7 @@ func (rw *fileReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error)
}
// AddMapping implements memmap.Mappable.AddMapping.
-func (f *fileInodeOperations) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) error {
+func (f *fileInodeOperations) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error {
f.mapsMu.Lock()
defer f.mapsMu.Unlock()
@@ -544,7 +545,7 @@ func (f *fileInodeOperations) AddMapping(ctx context.Context, ms memmap.MappingS
pagesBefore := f.writableMappingPages
// ar is guaranteed to be page aligned per memmap.Mappable.
- f.writableMappingPages += uint64(ar.Length() / usermem.PageSize)
+ f.writableMappingPages += uint64(ar.Length() / hostarch.PageSize)
if f.writableMappingPages < pagesBefore {
panic(fmt.Sprintf("Overflow while mapping potentially writable pages pointing to a tmpfs file. Before %v, after %v", pagesBefore, f.writableMappingPages))
@@ -555,7 +556,7 @@ func (f *fileInodeOperations) AddMapping(ctx context.Context, ms memmap.MappingS
}
// RemoveMapping implements memmap.Mappable.RemoveMapping.
-func (f *fileInodeOperations) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) {
+func (f *fileInodeOperations) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) {
f.mapsMu.Lock()
defer f.mapsMu.Unlock()
@@ -565,7 +566,7 @@ func (f *fileInodeOperations) RemoveMapping(ctx context.Context, ms memmap.Mappi
pagesBefore := f.writableMappingPages
// ar is guaranteed to be page aligned per memmap.Mappable.
- f.writableMappingPages -= uint64(ar.Length() / usermem.PageSize)
+ f.writableMappingPages -= uint64(ar.Length() / hostarch.PageSize)
if f.writableMappingPages > pagesBefore {
panic(fmt.Sprintf("Underflow while unmapping potentially writable pages pointing to a tmpfs file. Before %v, after %v", pagesBefore, f.writableMappingPages))
@@ -574,12 +575,12 @@ func (f *fileInodeOperations) RemoveMapping(ctx context.Context, ms memmap.Mappi
}
// CopyMapping implements memmap.Mappable.CopyMapping.
-func (f *fileInodeOperations) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, writable bool) error {
+func (f *fileInodeOperations) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error {
return f.AddMapping(ctx, ms, dstAR, offset, writable)
}
// Translate implements memmap.Mappable.Translate.
-func (f *fileInodeOperations) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) {
+func (f *fileInodeOperations) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) {
f.dataMu.Lock()
defer f.dataMu.Unlock()
@@ -612,7 +613,7 @@ func (f *fileInodeOperations) Translate(ctx context.Context, required, optional
Source: segMR,
File: mf,
Offset: seg.FileRangeOf(segMR).Start,
- Perms: usermem.AnyAccess,
+ Perms: hostarch.AnyAccess,
})
translatedEnd = segMR.End
}
diff --git a/pkg/sentry/fs/tmpfs/tmpfs.go b/pkg/sentry/fs/tmpfs/tmpfs.go
index cf4ed5de0..577052888 100644
--- a/pkg/sentry/fs/tmpfs/tmpfs.go
+++ b/pkg/sentry/fs/tmpfs/tmpfs.go
@@ -20,6 +20,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
"gvisor.dev/gvisor/pkg/sentry/fs/ramfs"
@@ -28,7 +29,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/sentry/usage"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
var fsInfo = fs.Info{
@@ -41,8 +41,8 @@ var fsInfo = fs.Info{
// chosen to ensure that BlockSize * Blocks does not overflow int64 (which
// applications may also handle incorrectly).
// TODO(b/29637826): allow configuring a tmpfs size and enforce it.
- TotalBlocks: math.MaxInt64 / usermem.PageSize,
- FreeBlocks: math.MaxInt64 / usermem.PageSize,
+ TotalBlocks: math.MaxInt64 / hostarch.PageSize,
+ FreeBlocks: math.MaxInt64 / hostarch.PageSize,
}
// rename implements fs.InodeOperations.Rename for tmpfs nodes.
@@ -99,7 +99,7 @@ func NewDir(ctx context.Context, contents map[string]*fs.Inode, owner fs.FileOwn
return fs.NewInode(ctx, d, msrc, fs.StableAttr{
DeviceID: tmpfsDevice.DeviceID(),
InodeID: tmpfsDevice.NextIno(),
- BlockSize: usermem.PageSize,
+ BlockSize: hostarch.PageSize,
Type: fs.Directory,
})
}
@@ -232,7 +232,7 @@ func (d *Dir) newCreateOps() *ramfs.CreateOps {
return fs.NewInode(ctx, iops, dir.MountSource, fs.StableAttr{
DeviceID: tmpfsDevice.DeviceID(),
InodeID: tmpfsDevice.NextIno(),
- BlockSize: usermem.PageSize,
+ BlockSize: hostarch.PageSize,
Type: fs.RegularFile,
}), nil
},
@@ -281,7 +281,7 @@ func NewSymlink(ctx context.Context, target string, owner fs.FileOwner, msrc *fs
return fs.NewInode(ctx, s, msrc, fs.StableAttr{
DeviceID: tmpfsDevice.DeviceID(),
InodeID: tmpfsDevice.NextIno(),
- BlockSize: usermem.PageSize,
+ BlockSize: hostarch.PageSize,
Type: fs.Symlink,
})
}
@@ -311,7 +311,7 @@ func NewSocket(ctx context.Context, socket transport.BoundEndpoint, owner fs.Fil
return fs.NewInode(ctx, s, msrc, fs.StableAttr{
DeviceID: tmpfsDevice.DeviceID(),
InodeID: tmpfsDevice.NextIno(),
- BlockSize: usermem.PageSize,
+ BlockSize: hostarch.PageSize,
Type: fs.Socket,
})
}
@@ -348,7 +348,7 @@ func NewFifo(ctx context.Context, owner fs.FileOwner, perms fs.FilePermissions,
return fs.NewInode(ctx, fifoIops, msrc, fs.StableAttr{
DeviceID: tmpfsDevice.DeviceID(),
InodeID: tmpfsDevice.NextIno(),
- BlockSize: usermem.PageSize,
+ BlockSize: hostarch.PageSize,
Type: fs.Pipe,
})
}
diff --git a/pkg/sentry/fs/tty/BUILD b/pkg/sentry/fs/tty/BUILD
index e6d0eb359..86ada820e 100644
--- a/pkg/sentry/fs/tty/BUILD
+++ b/pkg/sentry/fs/tty/BUILD
@@ -17,6 +17,7 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/hostarch",
"//pkg/marshal/primitive",
"//pkg/refs",
"//pkg/safemem",
diff --git a/pkg/sentry/fs/tty/dir.go b/pkg/sentry/fs/tty/dir.go
index c2da80bc2..13c9dbe7d 100644
--- a/pkg/sentry/fs/tty/dir.go
+++ b/pkg/sentry/fs/tty/dir.go
@@ -22,6 +22,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@@ -122,7 +123,7 @@ func newDir(ctx context.Context, m *fs.MountSource) *fs.Inode {
// TODO(b/75267214): Since ptsDevice must be shared between
// different mounts, we must not assign fixed numbers.
InodeID: ptsDevice.NextIno(),
- BlockSize: usermem.PageSize,
+ BlockSize: hostarch.PageSize,
Type: fs.Directory,
})
}
diff --git a/pkg/sentry/fsimpl/cgroupfs/BUILD b/pkg/sentry/fsimpl/cgroupfs/BUILD
new file mode 100644
index 000000000..48913068a
--- /dev/null
+++ b/pkg/sentry/fsimpl/cgroupfs/BUILD
@@ -0,0 +1,47 @@
+load("//tools:defs.bzl", "go_library")
+load("//tools/go_generics:defs.bzl", "go_template_instance")
+
+licenses(["notice"])
+
+go_template_instance(
+ name = "dir_refs",
+ out = "dir_refs.go",
+ package = "cgroupfs",
+ prefix = "dir",
+ template = "//pkg/refsvfs2:refs_template",
+ types = {
+ "T": "dir",
+ },
+)
+
+go_library(
+ name = "cgroupfs",
+ srcs = [
+ "base.go",
+ "cgroupfs.go",
+ "cpu.go",
+ "cpuacct.go",
+ "cpuset.go",
+ "dir_refs.go",
+ "memory.go",
+ ],
+ visibility = ["//pkg/sentry:internal"],
+ deps = [
+ "//pkg/abi/linux",
+ "//pkg/context",
+ "//pkg/coverage",
+ "//pkg/log",
+ "//pkg/refs",
+ "//pkg/refsvfs2",
+ "//pkg/sentry/arch",
+ "//pkg/sentry/fsimpl/kernfs",
+ "//pkg/sentry/kernel",
+ "//pkg/sentry/kernel/auth",
+ "//pkg/sentry/memmap",
+ "//pkg/sentry/usage",
+ "//pkg/sentry/vfs",
+ "//pkg/sync",
+ "//pkg/syserror",
+ "//pkg/usermem",
+ ],
+)
diff --git a/pkg/sentry/fsimpl/cgroupfs/base.go b/pkg/sentry/fsimpl/cgroupfs/base.go
new file mode 100644
index 000000000..39c1013e1
--- /dev/null
+++ b/pkg/sentry/fsimpl/cgroupfs/base.go
@@ -0,0 +1,233 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cgroupfs
+
+import (
+ "bytes"
+ "fmt"
+ "sort"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+// controllerCommon implements kernel.CgroupController.
+//
+// Must call init before use.
+//
+// +stateify savable
+type controllerCommon struct {
+ ty kernel.CgroupControllerType
+ fs *filesystem
+}
+
+func (c *controllerCommon) init(ty kernel.CgroupControllerType, fs *filesystem) {
+ c.ty = ty
+ c.fs = fs
+}
+
+// Type implements kernel.CgroupController.Type.
+func (c *controllerCommon) Type() kernel.CgroupControllerType {
+ return kernel.CgroupControllerType(c.ty)
+}
+
+// HierarchyID implements kernel.CgroupController.HierarchyID.
+func (c *controllerCommon) HierarchyID() uint32 {
+ return c.fs.hierarchyID
+}
+
+// NumCgroups implements kernel.CgroupController.NumCgroups.
+func (c *controllerCommon) NumCgroups() uint64 {
+ return atomic.LoadUint64(&c.fs.numCgroups)
+}
+
+// Enabled implements kernel.CgroupController.Enabled.
+//
+// Controllers are currently always enabled.
+func (c *controllerCommon) Enabled() bool {
+ return true
+}
+
+// Filesystem implements kernel.CgroupController.Filesystem.
+func (c *controllerCommon) Filesystem() *vfs.Filesystem {
+ return c.fs.VFSFilesystem()
+}
+
+// RootCgroup implements kernel.CgroupController.RootCgroup.
+func (c *controllerCommon) RootCgroup() kernel.Cgroup {
+ return c.fs.rootCgroup()
+}
+
+// controller is an interface for common functionality related to all cgroups.
+// It is an extension of the public cgroup interface, containing cgroup
+// functionality private to cgroupfs.
+type controller interface {
+ kernel.CgroupController
+
+ // AddControlFiles should extend the contents map with inodes representing
+ // control files defined by this controller.
+ AddControlFiles(ctx context.Context, creds *auth.Credentials, c *cgroupInode, contents map[string]kernfs.Inode)
+}
+
+// cgroupInode implements kernel.CgroupImpl and kernfs.Inode.
+//
+// +stateify savable
+type cgroupInode struct {
+ dir
+ fs *filesystem
+
+ // ts is the list of tasks in this cgroup. The kernel is responsible for
+ // removing tasks from this list before they're destroyed, so any tasks on
+ // this list are always valid.
+ //
+ // ts, and cgroup membership in general is protected by fs.tasksMu.
+ ts map[*kernel.Task]struct{}
+}
+
+var _ kernel.CgroupImpl = (*cgroupInode)(nil)
+
+func (fs *filesystem) newCgroupInode(ctx context.Context, creds *auth.Credentials) kernfs.Inode {
+ c := &cgroupInode{
+ fs: fs,
+ ts: make(map[*kernel.Task]struct{}),
+ }
+
+ contents := make(map[string]kernfs.Inode)
+ contents["cgroup.procs"] = fs.newControllerFile(ctx, creds, &cgroupProcsData{c})
+ contents["tasks"] = fs.newControllerFile(ctx, creds, &tasksData{c})
+
+ for _, ctl := range fs.controllers {
+ ctl.AddControlFiles(ctx, creds, c, contents)
+ }
+
+ c.dir.InodeAttrs.Init(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|linux.FileMode(0555))
+ c.dir.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
+ c.dir.InitRefs()
+ c.dir.IncLinks(c.dir.OrderedChildren.Populate(contents))
+
+ atomic.AddUint64(&fs.numCgroups, 1)
+
+ return c
+}
+
+func (c *cgroupInode) HierarchyID() uint32 {
+ return c.fs.hierarchyID
+}
+
+// Controllers implements kernel.CgroupImpl.Controllers.
+func (c *cgroupInode) Controllers() []kernel.CgroupController {
+ return c.fs.kcontrollers
+}
+
+// Enter implements kernel.CgroupImpl.Enter.
+func (c *cgroupInode) Enter(t *kernel.Task) {
+ c.fs.tasksMu.Lock()
+ c.ts[t] = struct{}{}
+ c.fs.tasksMu.Unlock()
+}
+
+// Leave implements kernel.CgroupImpl.Leave.
+func (c *cgroupInode) Leave(t *kernel.Task) {
+ c.fs.tasksMu.Lock()
+ delete(c.ts, t)
+ c.fs.tasksMu.Unlock()
+}
+
+func sortTIDs(tids []kernel.ThreadID) {
+ sort.Slice(tids, func(i, j int) bool { return tids[i] < tids[j] })
+}
+
+// +stateify savable
+type cgroupProcsData struct {
+ *cgroupInode
+}
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (d *cgroupProcsData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ t := kernel.TaskFromContext(ctx)
+ currPidns := t.ThreadGroup().PIDNamespace()
+
+ pgids := make(map[kernel.ThreadID]struct{})
+
+ d.fs.tasksMu.RLock()
+ defer d.fs.tasksMu.RUnlock()
+
+ for task := range d.ts {
+ // Map dedups pgid, since iterating over all tasks produces multiple
+ // entries for the group leaders.
+ if pgid := currPidns.IDOfThreadGroup(task.ThreadGroup()); pgid != 0 {
+ pgids[pgid] = struct{}{}
+ }
+ }
+
+ pgidList := make([]kernel.ThreadID, 0, len(pgids))
+ for pgid, _ := range pgids {
+ pgidList = append(pgidList, pgid)
+ }
+ sortTIDs(pgidList)
+
+ for _, pgid := range pgidList {
+ fmt.Fprintf(buf, "%d\n", pgid)
+ }
+
+ return nil
+}
+
+// Write implements vfs.WritableDynamicBytesSource.Write.
+func (d *cgroupProcsData) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) {
+ // TODO(b/183137098): Payload is the pid for a process to add to this cgroup.
+ return src.NumBytes(), nil
+}
+
+// +stateify savable
+type tasksData struct {
+ *cgroupInode
+}
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (d *tasksData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ t := kernel.TaskFromContext(ctx)
+ currPidns := t.ThreadGroup().PIDNamespace()
+
+ var pids []kernel.ThreadID
+
+ d.fs.tasksMu.RLock()
+ defer d.fs.tasksMu.RUnlock()
+
+ for task := range d.ts {
+ if pid := currPidns.IDOfTask(task); pid != 0 {
+ pids = append(pids, pid)
+ }
+ }
+ sortTIDs(pids)
+
+ for _, pid := range pids {
+ fmt.Fprintf(buf, "%d\n", pid)
+ }
+
+ return nil
+}
+
+// Write implements vfs.WritableDynamicBytesSource.Write.
+func (d *tasksData) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) {
+ // TODO(b/183137098): Payload is the pid for a process to add to this cgroup.
+ return src.NumBytes(), nil
+}
diff --git a/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go b/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go
new file mode 100644
index 000000000..ca8caee5f
--- /dev/null
+++ b/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go
@@ -0,0 +1,412 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package cgroupfs implements cgroupfs.
+//
+// A cgroup is a collection of tasks on the system, organized into a tree-like
+// structure similar to a filesystem directory tree. In fact, each cgroup is
+// represented by a directory on cgroupfs, and is manipulated through control
+// files in the directory.
+//
+// All cgroups on a system are organized into hierarchies. Hierarchies are a
+// distinct tree of cgroups, with a common set of controllers. One or more
+// cgroupfs mounts may point to each hierarchy. These mounts provide a common
+// view into the same tree of cgroups.
+//
+// A controller (also known as a "resource controller", or a cgroup "subsystem")
+// determines the behaviour of each cgroup.
+//
+// In addition to cgroupfs, the kernel has a cgroup registry that tracks
+// system-wide state related to cgroups such as active hierarchies and the
+// controllers associated with them.
+//
+// Since cgroupfs doesn't allow hardlinks, there is a unique mapping between
+// cgroupfs dentries and inodes.
+//
+// # Synchronization
+//
+// Cgroup hierarchy creation and destruction is protected by the
+// kernel.CgroupRegistry.mu. Once created, a hierarchy's set of controllers, the
+// filesystem associated with it, and the root cgroup for the hierarchy are
+// immutable.
+//
+// Membership of tasks within cgroups is protected by
+// cgroupfs.filesystem.tasksMu. Tasks also maintain a set of all cgroups they're
+// in, and this list is protected by Task.mu.
+//
+// Lock order:
+//
+// kernel.CgroupRegistry.mu
+// cgroupfs.filesystem.mu
+// Task.mu
+// cgroupfs.filesystem.tasksMu.
+package cgroupfs
+
+import (
+ "fmt"
+ "sort"
+ "strconv"
+ "strings"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/sync"
+ "gvisor.dev/gvisor/pkg/syserror"
+)
+
+const (
+ // Name is the default filesystem name.
+ Name = "cgroup"
+ readonlyFileMode = linux.FileMode(0444)
+ writableFileMode = linux.FileMode(0644)
+ defaultMaxCachedDentries = uint64(1000)
+)
+
+const (
+ controllerCPU = kernel.CgroupControllerType("cpu")
+ controllerCPUAcct = kernel.CgroupControllerType("cpuacct")
+ controllerCPUSet = kernel.CgroupControllerType("cpuset")
+ controllerMemory = kernel.CgroupControllerType("memory")
+)
+
+var allControllers = []kernel.CgroupControllerType{controllerCPU, controllerCPUAcct, controllerCPUSet, controllerMemory}
+
+// SupportedMountOptions is the set of supported mount options for cgroupfs.
+var SupportedMountOptions = []string{"all", "cpu", "cpuacct", "cpuset", "memory"}
+
+// FilesystemType implements vfs.FilesystemType.
+//
+// +stateify savable
+type FilesystemType struct{}
+
+// InternalData contains internal data passed in to the cgroupfs mount via
+// vfs.GetFilesystemOptions.InternalData.
+//
+// +stateify savable
+type InternalData struct {
+ DefaultControlValues map[string]int64
+}
+
+// filesystem implements vfs.FilesystemImpl.
+//
+// +stateify savable
+type filesystem struct {
+ kernfs.Filesystem
+ devMinor uint32
+
+ // hierarchyID is the id the cgroup registry assigns to this hierarchy. Has
+ // the value kernel.InvalidCgroupHierarchyID until the FS is fully
+ // initialized.
+ //
+ // hierarchyID is immutable after initialization.
+ hierarchyID uint32
+
+ // controllers and kcontrollers are both the list of controllers attached to
+ // this cgroupfs. Both lists are the same set of controllers, but typecast
+ // to different interfaces for convenience. Both must stay in sync, and are
+ // immutable.
+ controllers []controller
+ kcontrollers []kernel.CgroupController
+
+ numCgroups uint64 // Protected by atomic ops.
+
+ root *kernfs.Dentry
+
+ // tasksMu serializes task membership changes across all cgroups within a
+ // filesystem.
+ tasksMu sync.RWMutex `state:"nosave"`
+}
+
+// Name implements vfs.FilesystemType.Name.
+func (FilesystemType) Name() string {
+ return Name
+}
+
+// Release implements vfs.FilesystemType.Release.
+func (FilesystemType) Release(ctx context.Context) {}
+
+// GetFilesystem implements vfs.FilesystemType.GetFilesystem.
+func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
+ devMinor, err := vfsObj.GetAnonBlockDevMinor()
+ if err != nil {
+ return nil, nil, err
+ }
+
+ mopts := vfs.GenericParseMountOptions(opts.Data)
+ maxCachedDentries := defaultMaxCachedDentries
+ if str, ok := mopts["dentry_cache_limit"]; ok {
+ delete(mopts, "dentry_cache_limit")
+ maxCachedDentries, err = strconv.ParseUint(str, 10, 64)
+ if err != nil {
+ ctx.Warningf("sys.FilesystemType.GetFilesystem: invalid dentry cache limit: dentry_cache_limit=%s", str)
+ return nil, nil, syserror.EINVAL
+ }
+ }
+
+ var wantControllers []kernel.CgroupControllerType
+ if _, ok := mopts["cpu"]; ok {
+ delete(mopts, "cpu")
+ wantControllers = append(wantControllers, controllerCPU)
+ }
+ if _, ok := mopts["cpuacct"]; ok {
+ delete(mopts, "cpuacct")
+ wantControllers = append(wantControllers, controllerCPUAcct)
+ }
+ if _, ok := mopts["cpuset"]; ok {
+ delete(mopts, "cpuset")
+ wantControllers = append(wantControllers, controllerCPUSet)
+ }
+ if _, ok := mopts["memory"]; ok {
+ delete(mopts, "memory")
+ wantControllers = append(wantControllers, controllerMemory)
+ }
+ if _, ok := mopts["all"]; ok {
+ if len(wantControllers) > 0 {
+ ctx.Debugf("cgroupfs.FilesystemType.GetFilesystem: other controllers specified with all: %v", wantControllers)
+ return nil, nil, syserror.EINVAL
+ }
+
+ delete(mopts, "all")
+ wantControllers = allControllers
+ }
+
+ if len(wantControllers) == 0 {
+ // Specifying no controllers implies all controllers.
+ wantControllers = allControllers
+ }
+
+ if len(mopts) != 0 {
+ ctx.Debugf("cgroupfs.FilesystemType.GetFilesystem: unknown options: %v", mopts)
+ return nil, nil, syserror.EINVAL
+ }
+
+ k := kernel.KernelFromContext(ctx)
+ r := k.CgroupRegistry()
+
+ // "It is not possible to mount the same controller against multiple
+ // cgroup hierarchies. For example, it is not possible to mount both
+ // the cpu and cpuacct controllers against one hierarchy, and to mount
+ // the cpu controller alone against another hierarchy." - man cgroups(7)
+ //
+ // Is there a hierarchy available with all the controllers we want? If so,
+ // this mount is a view into the same hierarchy.
+ //
+ // Note: we're guaranteed to have at least one requested controller, since
+ // no explicit controller name implies all controllers.
+ if vfsfs := r.FindHierarchy(wantControllers); vfsfs != nil {
+ fs := vfsfs.Impl().(*filesystem)
+ ctx.Debugf("cgroupfs.FilesystemType.GetFilesystem: mounting new view to hierarchy %v", fs.hierarchyID)
+ fs.root.IncRef()
+ return vfsfs, fs.root.VFSDentry(), nil
+ }
+
+ // No existing hierarchy with the exactly controllers found. Make a new
+ // one. Note that it's possible this mount creation is unsatisfiable, if one
+ // or more of the requested controllers are already on existing
+ // hierarchies. We'll find out about such collisions when we try to register
+ // the new hierarchy later.
+ fs := &filesystem{
+ devMinor: devMinor,
+ }
+ fs.MaxCachedDentries = maxCachedDentries
+ fs.VFSFilesystem().Init(vfsObj, &fsType, fs)
+
+ var defaults map[string]int64
+ if opts.InternalData != nil {
+ ctx.Debugf("cgroupfs.FilesystemType.GetFilesystem: default control values: %v", defaults)
+ defaults = opts.InternalData.(*InternalData).DefaultControlValues
+ }
+
+ for _, ty := range wantControllers {
+ var c controller
+ switch ty {
+ case controllerMemory:
+ c = newMemoryController(fs, defaults)
+ case controllerCPU:
+ c = newCPUController(fs, defaults)
+ case controllerCPUAcct:
+ c = newCPUAcctController(fs)
+ case controllerCPUSet:
+ c = newCPUSetController(fs)
+ default:
+ panic(fmt.Sprintf("Unreachable: unknown cgroup controller %q", ty))
+ }
+ fs.controllers = append(fs.controllers, c)
+ }
+
+ if len(defaults) != 0 {
+ // Internal data is always provided at sentry startup and unused values
+ // indicate a problem with the sandbox config. Fail fast.
+ panic(fmt.Sprintf("cgroupfs.FilesystemType.GetFilesystem: unknown internal mount data: %v", defaults))
+ }
+
+ // Controllers usually appear in alphabetical order when displayed. Sort it
+ // here now, so it never needs to be sorted elsewhere.
+ sort.Slice(fs.controllers, func(i, j int) bool { return fs.controllers[i].Type() < fs.controllers[j].Type() })
+ fs.kcontrollers = make([]kernel.CgroupController, 0, len(fs.controllers))
+ for _, c := range fs.controllers {
+ fs.kcontrollers = append(fs.kcontrollers, c)
+ }
+
+ root := fs.newCgroupInode(ctx, creds)
+ var rootD kernfs.Dentry
+ rootD.InitRoot(&fs.Filesystem, root)
+ fs.root = &rootD
+
+ // Register controllers. The registry may be modified concurrently, so if we
+ // get an error, we raced with someone else who registered the same
+ // controllers first.
+ hid, err := r.Register(fs.kcontrollers)
+ if err != nil {
+ ctx.Infof("cgroupfs.FilesystemType.GetFilesystem: failed to register new hierarchy with controllers %v: %v", wantControllers, err)
+ rootD.DecRef(ctx)
+ fs.VFSFilesystem().DecRef(ctx)
+ return nil, nil, syserror.EBUSY
+ }
+ fs.hierarchyID = hid
+
+ // Move all existing tasks to the root of the new hierarchy.
+ k.PopulateNewCgroupHierarchy(fs.rootCgroup())
+
+ return fs.VFSFilesystem(), rootD.VFSDentry(), nil
+}
+
+func (fs *filesystem) rootCgroup() kernel.Cgroup {
+ return kernel.Cgroup{
+ Dentry: fs.root,
+ CgroupImpl: fs.root.Inode().(kernel.CgroupImpl),
+ }
+}
+
+// Release implements vfs.FilesystemImpl.Release.
+func (fs *filesystem) Release(ctx context.Context) {
+ k := kernel.KernelFromContext(ctx)
+ r := k.CgroupRegistry()
+
+ if fs.hierarchyID != kernel.InvalidCgroupHierarchyID {
+ k.ReleaseCgroupHierarchy(fs.hierarchyID)
+ r.Unregister(fs.hierarchyID)
+ }
+
+ fs.Filesystem.VFSFilesystem().VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor)
+ fs.Filesystem.Release(ctx)
+}
+
+// MountOptions implements vfs.FilesystemImpl.MountOptions.
+func (fs *filesystem) MountOptions() string {
+ var cnames []string
+ for _, c := range fs.controllers {
+ cnames = append(cnames, string(c.Type()))
+ }
+ return strings.Join(cnames, ",")
+}
+
+// +stateify savable
+type implStatFS struct{}
+
+// StatFS implements kernfs.Inode.StatFS.
+func (*implStatFS) StatFS(context.Context, *vfs.Filesystem) (linux.Statfs, error) {
+ return vfs.GenericStatFS(linux.CGROUP_SUPER_MAGIC), nil
+}
+
+// dir implements kernfs.Inode for a generic cgroup resource controller
+// directory. Specific controllers extend this to add their own functionality.
+//
+// +stateify savable
+type dir struct {
+ dirRefs
+ kernfs.InodeAlwaysValid
+ kernfs.InodeAttrs
+ kernfs.InodeNotSymlink
+ kernfs.InodeDirectoryNoNewChildren // TODO(b/183137098): Implement mkdir.
+ kernfs.OrderedChildren
+ implStatFS
+
+ locks vfs.FileLocks
+}
+
+// Keep implements kernfs.Inode.Keep.
+func (*dir) Keep() bool {
+ return true
+}
+
+// SetStat implements kernfs.Inode.SetStat not allowing inode attributes to be changed.
+func (*dir) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
+ return syserror.EPERM
+}
+
+// Open implements kernfs.Inode.Open.
+func (d *dir) Open(ctx context.Context, rp *vfs.ResolvingPath, kd *kernfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+ fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), kd, &d.OrderedChildren, &d.locks, &opts, kernfs.GenericDirectoryFDOptions{
+ SeekEnd: kernfs.SeekEndStaticEntries,
+ })
+ if err != nil {
+ return nil, err
+ }
+ return fd.VFSFileDescription(), nil
+}
+
+// DecRef implements kernfs.Inode.DecRef.
+func (d *dir) DecRef(ctx context.Context) {
+ d.dirRefs.DecRef(func() { d.Destroy(ctx) })
+}
+
+// StatFS implements kernfs.Inode.StatFS.
+func (d *dir) StatFS(ctx context.Context, fs *vfs.Filesystem) (linux.Statfs, error) {
+ return vfs.GenericStatFS(linux.CGROUP_SUPER_MAGIC), nil
+}
+
+// controllerFile represents a generic control file that appears within a cgroup
+// directory.
+//
+// +stateify savable
+type controllerFile struct {
+ kernfs.DynamicBytesFile
+}
+
+func (fs *filesystem) newControllerFile(ctx context.Context, creds *auth.Credentials, data vfs.DynamicBytesSource) kernfs.Inode {
+ f := &controllerFile{}
+ f.Init(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), data, readonlyFileMode)
+ return f
+}
+
+func (fs *filesystem) newControllerWritableFile(ctx context.Context, creds *auth.Credentials, data vfs.WritableDynamicBytesSource) kernfs.Inode {
+ f := &controllerFile{}
+ f.Init(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), data, writableFileMode)
+ return f
+}
+
+// staticControllerFile represents a generic control file that appears within a
+// cgroup directory which always returns the same data when read.
+// staticControllerFiles are not writable.
+//
+// +stateify savable
+type staticControllerFile struct {
+ kernfs.DynamicBytesFile
+ vfs.StaticData
+}
+
+// Note: We let the caller provide the mode so that static files may be used to
+// fake both readable and writable control files. However, static files are
+// effectively readonly, as attempting to write to them will return EIO
+// regardless of the mode.
+func (fs *filesystem) newStaticControllerFile(ctx context.Context, creds *auth.Credentials, mode linux.FileMode, data string) kernfs.Inode {
+ f := &staticControllerFile{StaticData: vfs.StaticData{Data: data}}
+ f.Init(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), f, mode)
+ return f
+}
diff --git a/pkg/sentry/fsimpl/cgroupfs/cpu.go b/pkg/sentry/fsimpl/cgroupfs/cpu.go
new file mode 100644
index 000000000..24d86a277
--- /dev/null
+++ b/pkg/sentry/fsimpl/cgroupfs/cpu.go
@@ -0,0 +1,70 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cgroupfs
+
+import (
+ "fmt"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+)
+
+// +stateify savable
+type cpuController struct {
+ controllerCommon
+
+ // CFS bandwidth control parameters, values in microseconds.
+ cfsPeriod int64
+ cfsQuota int64
+
+ // CPU shares, values should be (num core * 1024).
+ shares int64
+}
+
+var _ controller = (*cpuController)(nil)
+
+func newCPUController(fs *filesystem, defaults map[string]int64) *cpuController {
+ // Default values for controller parameters from Linux.
+ c := &cpuController{
+ cfsPeriod: 100000,
+ cfsQuota: -1,
+ shares: 1024,
+ }
+
+ if val, ok := defaults["cpu.cfs_period_us"]; ok {
+ c.cfsPeriod = val
+ delete(defaults, "cpu.cfs_period_us")
+ }
+ if val, ok := defaults["cpu.cfs_quota_us"]; ok {
+ c.cfsQuota = val
+ delete(defaults, "cpu.cfs_quota_us")
+ }
+ if val, ok := defaults["cpu.shares"]; ok {
+ c.shares = val
+ delete(defaults, "cpu.shares")
+ }
+
+ c.controllerCommon.init(controllerCPU, fs)
+ return c
+}
+
+// AddControlFiles implements controller.AddControlFiles.
+func (c *cpuController) AddControlFiles(ctx context.Context, creds *auth.Credentials, _ *cgroupInode, contents map[string]kernfs.Inode) {
+ contents["cpu.cfs_period_us"] = c.fs.newStaticControllerFile(ctx, creds, linux.FileMode(0644), fmt.Sprintf("%d\n", c.cfsPeriod))
+ contents["cpu.cfs_quota_us"] = c.fs.newStaticControllerFile(ctx, creds, linux.FileMode(0644), fmt.Sprintf("%d\n", c.cfsQuota))
+ contents["cpu.shares"] = c.fs.newStaticControllerFile(ctx, creds, linux.FileMode(0644), fmt.Sprintf("%d\n", c.shares))
+}
diff --git a/pkg/sentry/fsimpl/cgroupfs/cpuacct.go b/pkg/sentry/fsimpl/cgroupfs/cpuacct.go
new file mode 100644
index 000000000..d4104a00e
--- /dev/null
+++ b/pkg/sentry/fsimpl/cgroupfs/cpuacct.go
@@ -0,0 +1,114 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cgroupfs
+
+import (
+ "bytes"
+ "fmt"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+)
+
+// +stateify savable
+type cpuacctController struct {
+ controllerCommon
+}
+
+var _ controller = (*cpuacctController)(nil)
+
+func newCPUAcctController(fs *filesystem) *cpuacctController {
+ c := &cpuacctController{}
+ c.controllerCommon.init(controllerCPUAcct, fs)
+ return c
+}
+
+// AddControlFiles implements controller.AddControlFiles.
+func (c *cpuacctController) AddControlFiles(ctx context.Context, creds *auth.Credentials, cg *cgroupInode, contents map[string]kernfs.Inode) {
+ cpuacctCG := &cpuacctCgroup{cg}
+ contents["cpuacct.stat"] = c.fs.newControllerFile(ctx, creds, &cpuacctStatData{cpuacctCG})
+ contents["cpuacct.usage"] = c.fs.newControllerFile(ctx, creds, &cpuacctUsageData{cpuacctCG})
+ contents["cpuacct.usage_user"] = c.fs.newControllerFile(ctx, creds, &cpuacctUsageUserData{cpuacctCG})
+ contents["cpuacct.usage_sys"] = c.fs.newControllerFile(ctx, creds, &cpuacctUsageSysData{cpuacctCG})
+}
+
+// +stateify savable
+type cpuacctCgroup struct {
+ *cgroupInode
+}
+
+func (c *cpuacctCgroup) collectCPUStats() usage.CPUStats {
+ var cs usage.CPUStats
+ c.fs.tasksMu.RLock()
+ // Note: This isn't very accurate, since the tasks are potentially
+ // still running as we accumulate their stats.
+ for t := range c.ts {
+ cs.Accumulate(t.CPUStats())
+ }
+ c.fs.tasksMu.RUnlock()
+ return cs
+}
+
+// +stateify savable
+type cpuacctStatData struct {
+ *cpuacctCgroup
+}
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (d *cpuacctStatData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ cs := d.collectCPUStats()
+ fmt.Fprintf(buf, "user %d\n", linux.ClockTFromDuration(cs.UserTime))
+ fmt.Fprintf(buf, "system %d\n", linux.ClockTFromDuration(cs.SysTime))
+ return nil
+}
+
+// +stateify savable
+type cpuacctUsageData struct {
+ *cpuacctCgroup
+}
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (d *cpuacctUsageData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ cs := d.collectCPUStats()
+ fmt.Fprintf(buf, "%d\n", cs.UserTime.Nanoseconds()+cs.SysTime.Nanoseconds())
+ return nil
+}
+
+// +stateify savable
+type cpuacctUsageUserData struct {
+ *cpuacctCgroup
+}
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (d *cpuacctUsageUserData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ cs := d.collectCPUStats()
+ fmt.Fprintf(buf, "%d\n", cs.UserTime.Nanoseconds())
+ return nil
+}
+
+// +stateify savable
+type cpuacctUsageSysData struct {
+ *cpuacctCgroup
+}
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (d *cpuacctUsageSysData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ cs := d.collectCPUStats()
+ fmt.Fprintf(buf, "%d\n", cs.SysTime.Nanoseconds())
+ return nil
+}
diff --git a/pkg/sentry/fsimpl/cgroupfs/cpuset.go b/pkg/sentry/fsimpl/cgroupfs/cpuset.go
new file mode 100644
index 000000000..ac547f8e2
--- /dev/null
+++ b/pkg/sentry/fsimpl/cgroupfs/cpuset.go
@@ -0,0 +1,39 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cgroupfs
+
+import (
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+)
+
+// +stateify savable
+type cpusetController struct {
+ controllerCommon
+}
+
+var _ controller = (*cpusetController)(nil)
+
+func newCPUSetController(fs *filesystem) *cpusetController {
+ c := &cpusetController{}
+ c.controllerCommon.init(controllerCPUSet, fs)
+ return c
+}
+
+// AddControlFiles implements controller.AddControlFiles.
+func (c *cpusetController) AddControlFiles(ctx context.Context, creds *auth.Credentials, _ *cgroupInode, contents map[string]kernfs.Inode) {
+ // This controller is currently intentionally empty.
+}
diff --git a/pkg/sentry/fsimpl/cgroupfs/memory.go b/pkg/sentry/fsimpl/cgroupfs/memory.go
new file mode 100644
index 000000000..485c98376
--- /dev/null
+++ b/pkg/sentry/fsimpl/cgroupfs/memory.go
@@ -0,0 +1,74 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cgroupfs
+
+import (
+ "bytes"
+ "fmt"
+ "math"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+)
+
+// +stateify savable
+type memoryController struct {
+ controllerCommon
+
+ limitBytes int64
+}
+
+var _ controller = (*memoryController)(nil)
+
+func newMemoryController(fs *filesystem, defaults map[string]int64) *memoryController {
+ c := &memoryController{
+ // Linux sets this to (PAGE_COUNTER_MAX * PAGE_SIZE) by default, which
+ // is ~ 2**63 on a 64-bit system. So essentially, inifinity. The exact
+ // value isn't very important.
+ limitBytes: math.MaxInt64,
+ }
+ if val, ok := defaults["memory.limit_in_bytes"]; ok {
+ c.limitBytes = val
+ delete(defaults, "memory.limit_in_bytes")
+ }
+ c.controllerCommon.init(controllerMemory, fs)
+ return c
+}
+
+// AddControlFiles implements controller.AddControlFiles.
+func (c *memoryController) AddControlFiles(ctx context.Context, creds *auth.Credentials, _ *cgroupInode, contents map[string]kernfs.Inode) {
+ contents["memory.usage_in_bytes"] = c.fs.newControllerFile(ctx, creds, &memoryUsageInBytesData{})
+ contents["memory.limit_in_bytes"] = c.fs.newStaticControllerFile(ctx, creds, linux.FileMode(0644), fmt.Sprintf("%d\n", c.limitBytes))
+}
+
+// +stateify savable
+type memoryUsageInBytesData struct{}
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (d *memoryUsageInBytesData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ // TODO(b/183151557): This is a giant hack, we're using system-wide
+ // accounting since we know there is only one cgroup.
+ k := kernel.KernelFromContext(ctx)
+ mf := k.MemoryFile()
+ mf.UpdateUsage()
+ _, totalBytes := usage.MemoryAccounting.Copy()
+
+ fmt.Fprintf(buf, "%d\n", totalBytes)
+ return nil
+}
diff --git a/pkg/sentry/fsimpl/eventfd/BUILD b/pkg/sentry/fsimpl/eventfd/BUILD
index bcb01bb08..c09fdc7f9 100644
--- a/pkg/sentry/fsimpl/eventfd/BUILD
+++ b/pkg/sentry/fsimpl/eventfd/BUILD
@@ -10,6 +10,7 @@ go_library(
"//pkg/abi/linux",
"//pkg/context",
"//pkg/fdnotifier",
+ "//pkg/hostarch",
"//pkg/log",
"//pkg/sentry/vfs",
"//pkg/syserror",
diff --git a/pkg/sentry/fsimpl/eventfd/eventfd.go b/pkg/sentry/fsimpl/eventfd/eventfd.go
index 30bd05357..4f79cfcb7 100644
--- a/pkg/sentry/fsimpl/eventfd/eventfd.go
+++ b/pkg/sentry/fsimpl/eventfd/eventfd.go
@@ -23,6 +23,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fdnotifier"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
@@ -188,7 +189,7 @@ func (efd *EventFileDescription) read(ctx context.Context, dst usermem.IOSequenc
efd.queue.Notify(waiter.WritableEvents)
var buf [8]byte
- usermem.ByteOrder.PutUint64(buf[:], val)
+ hostarch.ByteOrder.PutUint64(buf[:], val)
_, err := dst.CopyOut(ctx, buf[:])
return err
}
@@ -196,7 +197,7 @@ func (efd *EventFileDescription) read(ctx context.Context, dst usermem.IOSequenc
// Preconditions: Must be called with efd.mu locked.
func (efd *EventFileDescription) hostWriteLocked(val uint64) error {
var buf [8]byte
- usermem.ByteOrder.PutUint64(buf[:], val)
+ hostarch.ByteOrder.PutUint64(buf[:], val)
_, err := unix.Write(efd.hostfd, buf[:])
if err == unix.EWOULDBLOCK {
return syserror.ErrWouldBlock
@@ -209,7 +210,7 @@ func (efd *EventFileDescription) write(ctx context.Context, src usermem.IOSequen
if _, err := src.CopyIn(ctx, buf[:]); err != nil {
return err
}
- val := usermem.ByteOrder.Uint64(buf[:])
+ val := hostarch.ByteOrder.Uint64(buf[:])
return efd.Signal(val)
}
diff --git a/pkg/sentry/fsimpl/fuse/BUILD b/pkg/sentry/fsimpl/fuse/BUILD
index 155c0f56d..3a4777fbe 100644
--- a/pkg/sentry/fsimpl/fuse/BUILD
+++ b/pkg/sentry/fsimpl/fuse/BUILD
@@ -46,6 +46,7 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/hostarch",
"//pkg/log",
"//pkg/marshal",
"//pkg/refs",
@@ -75,6 +76,7 @@ go_test(
library = ":fuse",
deps = [
"//pkg/abi/linux",
+ "//pkg/hostarch",
"//pkg/marshal",
"//pkg/sentry/fsimpl/testutil",
"//pkg/sentry/kernel",
diff --git a/pkg/sentry/fsimpl/fuse/read_write.go b/pkg/sentry/fsimpl/fuse/read_write.go
index 23ce91849..66ea889f9 100644
--- a/pkg/sentry/fsimpl/fuse/read_write.go
+++ b/pkg/sentry/fsimpl/fuse/read_write.go
@@ -20,11 +20,11 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
// ReadInPages sends FUSE_READ requests for the size after round it up to
@@ -43,10 +43,10 @@ func (fs *filesystem) ReadInPages(ctx context.Context, fd *regularFileFD, off ui
}
// Round up to a multiple of page size.
- readSize, _ := usermem.PageRoundUp(uint64(size))
+ readSize, _ := hostarch.PageRoundUp(uint64(size))
// One request cannnot exceed either maxRead or maxPages.
- maxPages := fs.conn.maxRead >> usermem.PageShift
+ maxPages := fs.conn.maxRead >> hostarch.PageShift
if maxPages > uint32(fs.conn.maxPages) {
maxPages = uint32(fs.conn.maxPages)
}
@@ -54,9 +54,9 @@ func (fs *filesystem) ReadInPages(ctx context.Context, fd *regularFileFD, off ui
var outs [][]byte
var sizeRead uint32
- // readSize is a multiple of usermem.PageSize.
+ // readSize is a multiple of hostarch.PageSize.
// Always request bytes as a multiple of pages.
- pagesRead, pagesToRead := uint32(0), uint32(readSize>>usermem.PageShift)
+ pagesRead, pagesToRead := uint32(0), uint32(readSize>>hostarch.PageShift)
// Reuse the same struct for unmarshalling to avoid unnecessary memory allocation.
in := linux.FUSEReadIn{
@@ -76,8 +76,8 @@ func (fs *filesystem) ReadInPages(ctx context.Context, fd *regularFileFD, off ui
pagesCanRead = maxPages
}
- in.Offset = off + (uint64(pagesRead) << usermem.PageShift)
- in.Size = pagesCanRead << usermem.PageShift
+ in.Offset = off + (uint64(pagesRead) << hostarch.PageShift)
+ in.Size = pagesCanRead << hostarch.PageShift
// TODO(gvisor.dev/issue/3247): support async read.
@@ -159,7 +159,7 @@ func (fs *filesystem) Write(ctx context.Context, fd *regularFileFD, off uint64,
}
// One request cannnot exceed either maxWrite or maxPages.
- maxWrite := uint32(fs.conn.maxPages) << usermem.PageShift
+ maxWrite := uint32(fs.conn.maxPages) << hostarch.PageShift
if maxWrite > fs.conn.maxWrite {
maxWrite = fs.conn.maxWrite
}
@@ -188,8 +188,8 @@ func (fs *filesystem) Write(ctx context.Context, fd *regularFileFD, off uint64,
// Limit the write size to one page.
// Note that the bigWrites flag is obsolete,
// latest libfuse always sets it on.
- if !fs.conn.bigWrites && toWrite > usermem.PageSize {
- toWrite = usermem.PageSize
+ if !fs.conn.bigWrites && toWrite > hostarch.PageSize {
+ toWrite = hostarch.PageSize
}
// Limit the write size to maxWrite.
diff --git a/pkg/sentry/fsimpl/fuse/request_response.go b/pkg/sentry/fsimpl/fuse/request_response.go
index 10fb9d7d2..8a72489fa 100644
--- a/pkg/sentry/fsimpl/fuse/request_response.go
+++ b/pkg/sentry/fsimpl/fuse/request_response.go
@@ -19,10 +19,10 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/marshal"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/usermem"
)
// fuseInitRes is a variable-length wrapper of linux.FUSEInitOut. The FUSE
@@ -45,29 +45,29 @@ func (r *fuseInitRes) UnmarshalBytes(src []byte) {
out := &r.initOut
// Introduced before FUSE kernel version 7.13.
- out.Major = uint32(usermem.ByteOrder.Uint32(src[:4]))
+ out.Major = uint32(hostarch.ByteOrder.Uint32(src[:4]))
src = src[4:]
- out.Minor = uint32(usermem.ByteOrder.Uint32(src[:4]))
+ out.Minor = uint32(hostarch.ByteOrder.Uint32(src[:4]))
src = src[4:]
- out.MaxReadahead = uint32(usermem.ByteOrder.Uint32(src[:4]))
+ out.MaxReadahead = uint32(hostarch.ByteOrder.Uint32(src[:4]))
src = src[4:]
- out.Flags = uint32(usermem.ByteOrder.Uint32(src[:4]))
+ out.Flags = uint32(hostarch.ByteOrder.Uint32(src[:4]))
src = src[4:]
- out.MaxBackground = uint16(usermem.ByteOrder.Uint16(src[:2]))
+ out.MaxBackground = uint16(hostarch.ByteOrder.Uint16(src[:2]))
src = src[2:]
- out.CongestionThreshold = uint16(usermem.ByteOrder.Uint16(src[:2]))
+ out.CongestionThreshold = uint16(hostarch.ByteOrder.Uint16(src[:2]))
src = src[2:]
- out.MaxWrite = uint32(usermem.ByteOrder.Uint32(src[:4]))
+ out.MaxWrite = uint32(hostarch.ByteOrder.Uint32(src[:4]))
src = src[4:]
// Introduced in FUSE kernel version 7.23.
if len(src) >= 4 {
- out.TimeGran = uint32(usermem.ByteOrder.Uint32(src[:4]))
+ out.TimeGran = uint32(hostarch.ByteOrder.Uint32(src[:4]))
src = src[4:]
}
// Introduced in FUSE kernel version 7.28.
if len(src) >= 2 {
- out.MaxPages = uint16(usermem.ByteOrder.Uint16(src[:2]))
+ out.MaxPages = uint16(hostarch.ByteOrder.Uint16(src[:2]))
src = src[2:]
}
_ = src // Remove unused warning.
diff --git a/pkg/sentry/fsimpl/fuse/utils_test.go b/pkg/sentry/fsimpl/fuse/utils_test.go
index 2c0cc0f4e..b0bab0066 100644
--- a/pkg/sentry/fsimpl/fuse/utils_test.go
+++ b/pkg/sentry/fsimpl/fuse/utils_test.go
@@ -24,7 +24,8 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/usermem"
+
+ "gvisor.dev/gvisor/pkg/hostarch"
)
func setup(t *testing.T) *testutil.System {
@@ -82,12 +83,12 @@ func (t *testPayload) SizeBytes() int {
// MarshalBytes implements marshal.Marshallable.MarshalBytes.
func (t *testPayload) MarshalBytes(dst []byte) {
- usermem.ByteOrder.PutUint32(dst[:4], t.data)
+ hostarch.ByteOrder.PutUint32(dst[:4], t.data)
}
// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes.
func (t *testPayload) UnmarshalBytes(src []byte) {
- *t = testPayload{data: usermem.ByteOrder.Uint32(src[:4])}
+ *t = testPayload{data: hostarch.ByteOrder.Uint32(src[:4])}
}
// Packed implements marshal.Marshallable.Packed.
@@ -106,17 +107,17 @@ func (t *testPayload) UnmarshalUnsafe(src []byte) {
}
// CopyOutN implements marshal.Marshallable.CopyOutN.
-func (t *testPayload) CopyOutN(task marshal.CopyContext, addr usermem.Addr, limit int) (int, error) {
+func (t *testPayload) CopyOutN(task marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) {
panic("not implemented")
}
// CopyOut implements marshal.Marshallable.CopyOut.
-func (t *testPayload) CopyOut(task marshal.CopyContext, addr usermem.Addr) (int, error) {
+func (t *testPayload) CopyOut(task marshal.CopyContext, addr hostarch.Addr) (int, error) {
panic("not implemented")
}
// CopyIn implements marshal.Marshallable.CopyIn.
-func (t *testPayload) CopyIn(task marshal.CopyContext, addr usermem.Addr) (int, error) {
+func (t *testPayload) CopyIn(task marshal.CopyContext, addr hostarch.Addr) (int, error) {
panic("not implemented")
}
diff --git a/pkg/sentry/fsimpl/gofer/BUILD b/pkg/sentry/fsimpl/gofer/BUILD
index 807b6ed1f..6d5258a9b 100644
--- a/pkg/sentry/fsimpl/gofer/BUILD
+++ b/pkg/sentry/fsimpl/gofer/BUILD
@@ -51,6 +51,7 @@ go_library(
"//pkg/fd",
"//pkg/fdnotifier",
"//pkg/fspath",
+ "//pkg/hostarch",
"//pkg/log",
"//pkg/p9",
"//pkg/refs",
diff --git a/pkg/sentry/fsimpl/gofer/directory.go b/pkg/sentry/fsimpl/gofer/directory.go
index 9da01cba3..177e42649 100644
--- a/pkg/sentry/fsimpl/gofer/directory.go
+++ b/pkg/sentry/fsimpl/gofer/directory.go
@@ -20,6 +20,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/p9"
"gvisor.dev/gvisor/pkg/refsvfs2"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@@ -28,7 +29,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
func (d *dentry) isDir() bool {
@@ -98,7 +98,7 @@ func (d *dentry) createSyntheticChildLocked(opts *createSyntheticOpts) {
mode: uint32(opts.mode),
uid: uint32(opts.kuid),
gid: uint32(opts.kgid),
- blockSize: usermem.PageSize, // arbitrary
+ blockSize: hostarch.PageSize, // arbitrary
atime: now,
mtime: now,
ctime: now,
diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go
index 692da02c1..526136324 100644
--- a/pkg/sentry/fsimpl/gofer/gofer.go
+++ b/pkg/sentry/fsimpl/gofer/gofer.go
@@ -44,6 +44,7 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/p9"
refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
@@ -60,7 +61,6 @@ import (
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/unet"
- "gvisor.dev/gvisor/pkg/usermem"
)
// Name is the default filesystem name.
@@ -872,7 +872,7 @@ func (fs *filesystem) newDentry(ctx context.Context, file p9file, qid p9.QID, ma
mode: uint32(attr.Mode),
uid: uint32(fs.opts.dfltuid),
gid: uint32(fs.opts.dfltgid),
- blockSize: usermem.PageSize,
+ blockSize: hostarch.PageSize,
readFD: -1,
writeFD: -1,
mmapFD: -1,
@@ -1104,24 +1104,27 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, opts *vfs
defer d.metadataMu.Unlock()
// As with Linux, if the UID, GID, or file size is changing, we have to
- // clear permission bits. Note that when set, clearSGID causes
- // permissions to be updated, but does not modify stat.Mask, as
- // modification would cause an extra inotify flag to be set.
- clearSGID := stat.Mask&linux.STATX_UID != 0 && stat.UID != atomic.LoadUint32(&d.uid) ||
- stat.Mask&linux.STATX_GID != 0 && stat.GID != atomic.LoadUint32(&d.gid) ||
+ // clear permission bits. Note that when set, clearSGID may cause
+ // permissions to be updated.
+ clearSGID := (stat.Mask&linux.STATX_UID != 0 && stat.UID != atomic.LoadUint32(&d.uid)) ||
+ (stat.Mask&linux.STATX_GID != 0 && stat.GID != atomic.LoadUint32(&d.gid)) ||
stat.Mask&linux.STATX_SIZE != 0
if clearSGID {
if stat.Mask&linux.STATX_MODE != 0 {
stat.Mode = uint16(vfs.ClearSUIDAndSGID(uint32(stat.Mode)))
} else {
- stat.Mode = uint16(vfs.ClearSUIDAndSGID(atomic.LoadUint32(&d.mode)))
+ oldMode := atomic.LoadUint32(&d.mode)
+ if updatedMode := vfs.ClearSUIDAndSGID(oldMode); updatedMode != oldMode {
+ stat.Mode = uint16(updatedMode)
+ stat.Mask |= linux.STATX_MODE
+ }
}
}
if !d.isSynthetic() {
if stat.Mask != 0 {
if err := d.file.setAttr(ctx, p9.SetAttrMask{
- Permissions: stat.Mask&linux.STATX_MODE != 0 || clearSGID,
+ Permissions: stat.Mask&linux.STATX_MODE != 0,
UID: stat.Mask&linux.STATX_UID != 0,
GID: stat.Mask&linux.STATX_GID != 0,
Size: stat.Mask&linux.STATX_SIZE != 0,
@@ -1156,7 +1159,7 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, opts *vfs
return nil
}
}
- if stat.Mask&linux.STATX_MODE != 0 || clearSGID {
+ if stat.Mask&linux.STATX_MODE != 0 {
atomic.StoreUint32(&d.mode, d.fileType()|uint32(stat.Mode))
}
if stat.Mask&linux.STATX_UID != 0 {
@@ -1217,8 +1220,8 @@ func (d *dentry) updateSizeLocked(newSize uint64) {
// so we can't race with Write or another truncate.)
d.dataMu.Unlock()
if d.size < oldSize {
- oldpgend, _ := usermem.PageRoundUp(oldSize)
- newpgend, _ := usermem.PageRoundUp(d.size)
+ oldpgend, _ := hostarch.PageRoundUp(oldSize)
+ newpgend, _ := hostarch.PageRoundUp(d.size)
if oldpgend != newpgend {
d.mapsMu.Lock()
d.mappings.Invalidate(memmap.MappableRange{newpgend, oldpgend}, memmap.InvalidateOpts{
diff --git a/pkg/sentry/fsimpl/gofer/regular_file.go b/pkg/sentry/fsimpl/gofer/regular_file.go
index 4f1ad0c88..713f0a480 100644
--- a/pkg/sentry/fsimpl/gofer/regular_file.go
+++ b/pkg/sentry/fsimpl/gofer/regular_file.go
@@ -22,6 +22,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/p9"
"gvisor.dev/gvisor/pkg/safemem"
@@ -291,8 +292,8 @@ func (fd *regularFileFD) writeCache(ctx context.Context, d *dentry, offset int64
}
// Remove touched pages from the cache.
- pgstart := usermem.PageRoundDown(uint64(offset))
- pgend, ok := usermem.PageRoundUp(uint64(offset + src.NumBytes()))
+ pgstart := hostarch.PageRoundDown(uint64(offset))
+ pgend, ok := hostarch.PageRoundUp(uint64(offset + src.NumBytes()))
if !ok {
return syserror.EINVAL
}
@@ -408,7 +409,7 @@ func (rw *dentryReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error)
switch {
case seg.Ok():
// Get internal mappings from the cache.
- ims, err := mf.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), usermem.Read)
+ ims, err := mf.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), hostarch.Read)
if err != nil {
dataMuUnlock()
rw.d.handleMu.RUnlock()
@@ -434,9 +435,9 @@ func (rw *dentryReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error)
if fillCache {
// Read into the cache, then re-enter the loop to read from the
// cache.
- gapEnd, _ := usermem.PageRoundUp(gapMR.End)
+ gapEnd, _ := hostarch.PageRoundUp(gapMR.End)
reqMR := memmap.MappableRange{
- Start: usermem.PageRoundDown(gapMR.Start),
+ Start: hostarch.PageRoundDown(gapMR.Start),
End: gapEnd,
}
optMR := gap.Range()
@@ -527,7 +528,7 @@ func (rw *dentryReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, erro
case seg.Ok():
// Get internal mappings from the cache.
segMR := seg.Range().Intersect(mr)
- ims, err := mf.MapInternal(seg.FileRangeOf(segMR), usermem.Write)
+ ims, err := mf.MapInternal(seg.FileRangeOf(segMR), hostarch.Write)
if err != nil {
retErr = err
goto exitLoop
@@ -700,6 +701,7 @@ func (fd *regularFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpt
}
// After this point, d may be used as a memmap.Mappable.
d.pf.hostFileMapperInitOnce.Do(d.pf.hostFileMapper.Init)
+ opts.SentryOwnedContent = d.fs.opts.forcePageCache
return vfs.GenericConfigureMMap(&fd.vfsfd, d, opts)
}
@@ -714,7 +716,7 @@ func (d *dentry) mayCachePages() bool {
}
// AddMapping implements memmap.Mappable.AddMapping.
-func (d *dentry) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) error {
+func (d *dentry) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error {
d.mapsMu.Lock()
mapped := d.mappings.AddMapping(ms, ar, offset, writable)
// Do this unconditionally since whether we have a host FD can change
@@ -735,7 +737,7 @@ func (d *dentry) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar user
}
// RemoveMapping implements memmap.Mappable.RemoveMapping.
-func (d *dentry) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) {
+func (d *dentry) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) {
d.mapsMu.Lock()
unmapped := d.mappings.RemoveMapping(ms, ar, offset, writable)
for _, r := range unmapped {
@@ -759,12 +761,12 @@ func (d *dentry) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar u
}
// CopyMapping implements memmap.Mappable.CopyMapping.
-func (d *dentry) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, writable bool) error {
+func (d *dentry) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error {
return d.AddMapping(ctx, ms, dstAR, offset, writable)
}
// Translate implements memmap.Mappable.Translate.
-func (d *dentry) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) {
+func (d *dentry) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) {
d.handleMu.RLock()
if d.mmapFD >= 0 && !d.fs.opts.forcePageCache {
d.handleMu.RUnlock()
@@ -777,7 +779,7 @@ func (d *dentry) Translate(ctx context.Context, required, optional memmap.Mappab
Source: mr,
File: &d.pf,
Offset: mr.Start,
- Perms: usermem.AnyAccess,
+ Perms: hostarch.AnyAccess,
},
}, nil
}
@@ -786,7 +788,7 @@ func (d *dentry) Translate(ctx context.Context, required, optional memmap.Mappab
// Constrain translations to d.size (rounded up) to prevent translation to
// pages that may be concurrently truncated.
- pgend, _ := usermem.PageRoundUp(d.size)
+ pgend, _ := hostarch.PageRoundUp(d.size)
var beyondEOF bool
if required.End > pgend {
if required.Start >= pgend {
@@ -811,7 +813,7 @@ func (d *dentry) Translate(ctx context.Context, required, optional memmap.Mappab
segMR := seg.Range().Intersect(optional)
// TODO(jamieliu): Make Translations writable even if writability is
// not required if already kept-dirty by another writable translation.
- perms := usermem.AccessType{
+ perms := hostarch.AccessType{
Read: true,
Execute: true,
}
@@ -954,7 +956,7 @@ func (d *dentryPlatformFile) DecRef(fr memmap.FileRange) {
}
// MapInternal implements memmap.File.MapInternal.
-func (d *dentryPlatformFile) MapInternal(fr memmap.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) {
+func (d *dentryPlatformFile) MapInternal(fr memmap.FileRange, at hostarch.AccessType) (safemem.BlockSeq, error) {
d.handleMu.RLock()
defer d.handleMu.RUnlock()
return d.hostFileMapper.MapInternal(fr, int(d.mmapFD), at.Write)
diff --git a/pkg/sentry/fsimpl/gofer/save_restore.go b/pkg/sentry/fsimpl/gofer/save_restore.go
index c90071e4e..83e841a51 100644
--- a/pkg/sentry/fsimpl/gofer/save_restore.go
+++ b/pkg/sentry/fsimpl/gofer/save_restore.go
@@ -22,12 +22,12 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fdnotifier"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/p9"
"gvisor.dev/gvisor/pkg/refsvfs2"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
type saveRestoreContextID int
@@ -85,7 +85,7 @@ func (fs *filesystem) PrepareSave(ctx context.Context) error {
func (fd *specialFileFD) savePipeData(ctx context.Context) error {
fd.bufMu.Lock()
defer fd.bufMu.Unlock()
- var buf [usermem.PageSize]byte
+ var buf [hostarch.PageSize]byte
for {
n, err := fd.handle.readToBlocksAt(ctx, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf[:])), ^uint64(0))
if n != 0 {
diff --git a/pkg/sentry/fsimpl/host/BUILD b/pkg/sentry/fsimpl/host/BUILD
index 4ae9d6d5e..b94dfeb7f 100644
--- a/pkg/sentry/fsimpl/host/BUILD
+++ b/pkg/sentry/fsimpl/host/BUILD
@@ -47,6 +47,7 @@ go_library(
"//pkg/context",
"//pkg/fdnotifier",
"//pkg/fspath",
+ "//pkg/hostarch",
"//pkg/iovec",
"//pkg/log",
"//pkg/marshal/primitive",
diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go
index b9cce4181..3b90375b6 100644
--- a/pkg/sentry/fsimpl/host/host.go
+++ b/pkg/sentry/fsimpl/host/host.go
@@ -26,6 +26,7 @@ import (
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fdnotifier"
"gvisor.dev/gvisor/pkg/fspath"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/hostfd"
@@ -431,8 +432,8 @@ func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Cre
}
oldSize := uint64(hostStat.Size)
if s.Size < oldSize {
- oldpgend, _ := usermem.PageRoundUp(oldSize)
- newpgend, _ := usermem.PageRoundUp(s.Size)
+ oldpgend, _ := hostarch.PageRoundUp(oldSize)
+ newpgend, _ := hostarch.PageRoundUp(s.Size)
if oldpgend != newpgend {
i.CachedMappable.InvalidateRange(memmap.MappableRange{newpgend, oldpgend})
}
diff --git a/pkg/sentry/fsimpl/host/save_restore.go b/pkg/sentry/fsimpl/host/save_restore.go
index 5688bddc8..31301c715 100644
--- a/pkg/sentry/fsimpl/host/save_restore.go
+++ b/pkg/sentry/fsimpl/host/save_restore.go
@@ -21,9 +21,9 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/fdnotifier"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/hostfd"
- "gvisor.dev/gvisor/pkg/usermem"
)
// beforeSave is invoked by stateify.
@@ -38,7 +38,7 @@ func (i *inode) beforeSave() {
// EBADF from the read.
i.bufMu.Lock()
defer i.bufMu.Unlock()
- var buf [usermem.PageSize]byte
+ var buf [hostarch.PageSize]byte
for {
n, err := hostfd.Preadv2(int32(i.hostFD), safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf[:])), -1 /* offset */, 0 /* flags */)
if n != 0 {
diff --git a/pkg/sentry/fsimpl/kernfs/BUILD b/pkg/sentry/fsimpl/kernfs/BUILD
index 6dbc7e34d..b7d13cced 100644
--- a/pkg/sentry/fsimpl/kernfs/BUILD
+++ b/pkg/sentry/fsimpl/kernfs/BUILD
@@ -105,6 +105,7 @@ go_library(
"//pkg/abi/linux",
"//pkg/context",
"//pkg/fspath",
+ "//pkg/hostarch",
"//pkg/log",
"//pkg/refs",
"//pkg/refsvfs2",
diff --git a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go
index 65054b0ea..84b1c3745 100644
--- a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go
+++ b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go
@@ -25,8 +25,10 @@ import (
"gvisor.dev/gvisor/pkg/usermem"
)
-// DynamicBytesFile implements kernfs.Inode and represents a read-only
-// file whose contents are backed by a vfs.DynamicBytesSource.
+// DynamicBytesFile implements kernfs.Inode and represents a read-only file
+// whose contents are backed by a vfs.DynamicBytesSource. If data additionally
+// implements vfs.WritableDynamicBytesSource, the file also supports dispatching
+// writes to the implementer, but note that this will not update the source data.
//
// Must be instantiated with NewDynamicBytesFile or initialized with Init
// before first use.
@@ -40,7 +42,9 @@ type DynamicBytesFile struct {
InodeNotSymlink
locks vfs.FileLocks
- data vfs.DynamicBytesSource
+ // data can additionally implement vfs.WritableDynamicBytesSource to support
+ // writes.
+ data vfs.DynamicBytesSource
}
var _ Inode = (*DynamicBytesFile)(nil)
diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
index 6b890a39c..3d0866ecf 100644
--- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
+++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
@@ -20,12 +20,12 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
// InodeNoopRefCount partially implements the Inode interface, specifically the
@@ -206,7 +206,7 @@ func (a *InodeAttrs) Init(ctx context.Context, creds *auth.Credentials, devMajor
atomic.StoreUint32(&a.uid, uint32(creds.EffectiveKUID))
atomic.StoreUint32(&a.gid, uint32(creds.EffectiveKGID))
atomic.StoreUint32(&a.nlink, nlink)
- atomic.StoreUint32(&a.blockSize, usermem.PageSize)
+ atomic.StoreUint32(&a.blockSize, hostarch.PageSize)
now := ktime.NowFromContext(ctx).Nanoseconds()
atomic.StoreInt64(&a.atime, now)
atomic.StoreInt64(&a.mtime, now)
diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go
index 565d723f0..16486eeae 100644
--- a/pkg/sentry/fsimpl/kernfs/kernfs.go
+++ b/pkg/sentry/fsimpl/kernfs/kernfs.go
@@ -61,6 +61,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/refsvfs2"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
@@ -508,6 +509,15 @@ func (d *Dentry) Inode() Inode {
return d.inode
}
+// FSLocalPath returns an absolute path to d, relative to the root of its
+// filesystem.
+func (d *Dentry) FSLocalPath() string {
+ var b fspath.Builder
+ _ = genericPrependPath(vfs.VirtualDentry{}, nil, d, &b)
+ b.PrependByte('/')
+ return b.String()
+}
+
// The Inode interface maps filesystem-level operations that operate on paths to
// equivalent operations on specific filesystem nodes.
//
diff --git a/pkg/sentry/fsimpl/kernfs/mmap_util.go b/pkg/sentry/fsimpl/kernfs/mmap_util.go
index bd6a134b4..d1539d904 100644
--- a/pkg/sentry/fsimpl/kernfs/mmap_util.go
+++ b/pkg/sentry/fsimpl/kernfs/mmap_util.go
@@ -16,11 +16,11 @@ package kernfs
import (
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/usermem"
)
// inodePlatformFile implements memmap.File. It exists solely because inode
@@ -66,7 +66,7 @@ func (i *inodePlatformFile) DecRef(fr memmap.FileRange) {
}
// MapInternal implements memmap.File.MapInternal.
-func (i *inodePlatformFile) MapInternal(fr memmap.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) {
+func (i *inodePlatformFile) MapInternal(fr memmap.FileRange, at hostarch.AccessType) (safemem.BlockSeq, error) {
return i.fileMapper.MapInternal(fr, i.hostFD, at.Write)
}
@@ -100,7 +100,7 @@ func (i *CachedMappable) Init(hostFD int) {
}
// AddMapping implements memmap.Mappable.AddMapping.
-func (i *CachedMappable) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) error {
+func (i *CachedMappable) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error {
i.mapsMu.Lock()
mapped := i.mappings.AddMapping(ms, ar, offset, writable)
for _, r := range mapped {
@@ -111,7 +111,7 @@ func (i *CachedMappable) AddMapping(ctx context.Context, ms memmap.MappingSpace,
}
// RemoveMapping implements memmap.Mappable.RemoveMapping.
-func (i *CachedMappable) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) {
+func (i *CachedMappable) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) {
i.mapsMu.Lock()
unmapped := i.mappings.RemoveMapping(ms, ar, offset, writable)
for _, r := range unmapped {
@@ -121,19 +121,19 @@ func (i *CachedMappable) RemoveMapping(ctx context.Context, ms memmap.MappingSpa
}
// CopyMapping implements memmap.Mappable.CopyMapping.
-func (i *CachedMappable) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, writable bool) error {
+func (i *CachedMappable) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error {
return i.AddMapping(ctx, ms, dstAR, offset, writable)
}
// Translate implements memmap.Mappable.Translate.
-func (i *CachedMappable) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) {
+func (i *CachedMappable) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) {
mr := optional
return []memmap.Translation{
{
Source: mr,
File: &i.pf,
Offset: mr.Start,
- Perms: usermem.AnyAccess,
+ Perms: hostarch.AnyAccess,
},
}, nil
}
diff --git a/pkg/sentry/fsimpl/overlay/BUILD b/pkg/sentry/fsimpl/overlay/BUILD
index bf13bbbf4..5504476c8 100644
--- a/pkg/sentry/fsimpl/overlay/BUILD
+++ b/pkg/sentry/fsimpl/overlay/BUILD
@@ -30,6 +30,7 @@ go_library(
"//pkg/abi/linux",
"//pkg/context",
"//pkg/fspath",
+ "//pkg/hostarch",
"//pkg/log",
"//pkg/refs",
"//pkg/refsvfs2",
diff --git a/pkg/sentry/fsimpl/overlay/copy_up.go b/pkg/sentry/fsimpl/overlay/copy_up.go
index 27b00cf6f..45aa5a494 100644
--- a/pkg/sentry/fsimpl/overlay/copy_up.go
+++ b/pkg/sentry/fsimpl/overlay/copy_up.go
@@ -21,11 +21,11 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fspath"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
func (d *dentry) isCopiedUp() bool {
@@ -138,8 +138,8 @@ func (d *dentry) copyUpLocked(ctx context.Context) error {
// We may have memory mappings of the file on the lower layer.
// Switch to mapping the file on the upper layer instead.
mmapOpts = &memmap.MMapOpts{
- Perms: usermem.ReadWrite,
- MaxPerms: usermem.ReadWrite,
+ Perms: hostarch.ReadWrite,
+ MaxPerms: hostarch.ReadWrite,
}
if err := newFD.ConfigureMMap(ctx, mmapOpts); err != nil {
cleanupUndoCopyUp()
diff --git a/pkg/sentry/fsimpl/overlay/regular_file.go b/pkg/sentry/fsimpl/overlay/regular_file.go
index d791c06db..43bfd69a3 100644
--- a/pkg/sentry/fsimpl/overlay/regular_file.go
+++ b/pkg/sentry/fsimpl/overlay/regular_file.go
@@ -19,6 +19,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@@ -445,7 +446,7 @@ func (fd *regularFileFD) ensureMappable(ctx context.Context, opts *memmap.MMapOp
}
// AddMapping implements memmap.Mappable.AddMapping.
-func (d *dentry) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) error {
+func (d *dentry) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error {
d.mapsMu.Lock()
defer d.mapsMu.Unlock()
if err := d.wrappedMappable.AddMapping(ctx, ms, ar, offset, writable); err != nil {
@@ -458,7 +459,7 @@ func (d *dentry) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar user
}
// RemoveMapping implements memmap.Mappable.RemoveMapping.
-func (d *dentry) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) {
+func (d *dentry) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) {
d.mapsMu.Lock()
defer d.mapsMu.Unlock()
d.wrappedMappable.RemoveMapping(ctx, ms, ar, offset, writable)
@@ -468,7 +469,7 @@ func (d *dentry) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar u
}
// CopyMapping implements memmap.Mappable.CopyMapping.
-func (d *dentry) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, writable bool) error {
+func (d *dentry) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error {
d.mapsMu.Lock()
defer d.mapsMu.Unlock()
if err := d.wrappedMappable.CopyMapping(ctx, ms, srcAR, dstAR, offset, writable); err != nil {
@@ -481,7 +482,7 @@ func (d *dentry) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR,
}
// Translate implements memmap.Mappable.Translate.
-func (d *dentry) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) {
+func (d *dentry) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) {
d.dataMu.RLock()
defer d.dataMu.RUnlock()
return d.wrappedMappable.Translate(ctx, required, optional, at)
diff --git a/pkg/sentry/fsimpl/pipefs/BUILD b/pkg/sentry/fsimpl/pipefs/BUILD
index 5950a2d59..278ee3c92 100644
--- a/pkg/sentry/fsimpl/pipefs/BUILD
+++ b/pkg/sentry/fsimpl/pipefs/BUILD
@@ -10,12 +10,12 @@ go_library(
"//pkg/abi/linux",
"//pkg/context",
"//pkg/fspath",
+ "//pkg/hostarch",
"//pkg/sentry/fsimpl/kernfs",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/kernel/pipe",
"//pkg/sentry/kernel/time",
"//pkg/sentry/vfs",
"//pkg/syserror",
- "//pkg/usermem",
],
)
diff --git a/pkg/sentry/fsimpl/pipefs/pipefs.go b/pkg/sentry/fsimpl/pipefs/pipefs.go
index 3f05e444e..08aedc2ad 100644
--- a/pkg/sentry/fsimpl/pipefs/pipefs.go
+++ b/pkg/sentry/fsimpl/pipefs/pipefs.go
@@ -22,13 +22,13 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fspath"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/kernel/pipe"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
// +stateify savable
@@ -131,7 +131,7 @@ func (i *inode) Stat(_ context.Context, vfsfs *vfs.Filesystem, opts vfs.StatOpti
ts := linux.NsecToStatxTimestamp(i.ctime.Nanoseconds())
return linux.Statx{
Mask: linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_NLINK | linux.STATX_UID | linux.STATX_GID | linux.STATX_ATIME | linux.STATX_MTIME | linux.STATX_CTIME | linux.STATX_INO | linux.STATX_SIZE | linux.STATX_BLOCKS,
- Blksize: usermem.PageSize,
+ Blksize: hostarch.PageSize,
Nlink: 1,
UID: uint32(i.uid),
GID: uint32(i.gid),
diff --git a/pkg/sentry/fsimpl/proc/BUILD b/pkg/sentry/fsimpl/proc/BUILD
index d47a4fff9..2b628bd55 100644
--- a/pkg/sentry/fsimpl/proc/BUILD
+++ b/pkg/sentry/fsimpl/proc/BUILD
@@ -81,6 +81,7 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/hostarch",
"//pkg/log",
"//pkg/refs",
"//pkg/refsvfs2",
diff --git a/pkg/sentry/fsimpl/proc/filesystem.go b/pkg/sentry/fsimpl/proc/filesystem.go
index 254a8b062..ce8f55b1f 100644
--- a/pkg/sentry/fsimpl/proc/filesystem.go
+++ b/pkg/sentry/fsimpl/proc/filesystem.go
@@ -86,13 +86,13 @@ func (ft FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualF
procfs.MaxCachedDentries = maxCachedDentries
procfs.VFSFilesystem().Init(vfsObj, &ft, procfs)
- var cgroups map[string]string
+ var fakeCgroupControllers map[string]string
if opts.InternalData != nil {
data := opts.InternalData.(*InternalData)
- cgroups = data.Cgroups
+ fakeCgroupControllers = data.Cgroups
}
- inode := procfs.newTasksInode(ctx, k, pidns, cgroups)
+ inode := procfs.newTasksInode(ctx, k, pidns, fakeCgroupControllers)
var dentry kernfs.Dentry
dentry.InitRoot(&procfs.Filesystem, inode)
return procfs.VFSFilesystem(), dentry.VFSDentry(), nil
diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go
index fea138f93..d05cc1508 100644
--- a/pkg/sentry/fsimpl/proc/task.go
+++ b/pkg/sentry/fsimpl/proc/task.go
@@ -47,7 +47,7 @@ type taskInode struct {
var _ kernfs.Inode = (*taskInode)(nil)
-func (fs *filesystem) newTaskInode(ctx context.Context, task *kernel.Task, pidns *kernel.PIDNamespace, isThreadGroup bool, cgroupControllers map[string]string) (kernfs.Inode, error) {
+func (fs *filesystem) newTaskInode(ctx context.Context, task *kernel.Task, pidns *kernel.PIDNamespace, isThreadGroup bool, fakeCgroupControllers map[string]string) (kernfs.Inode, error) {
if task.ExitState() == kernel.TaskExitDead {
return nil, syserror.ESRCH
}
@@ -82,10 +82,12 @@ func (fs *filesystem) newTaskInode(ctx context.Context, task *kernel.Task, pidns
"uid_map": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0644, &idMapData{task: task, gids: false}),
}
if isThreadGroup {
- contents["task"] = fs.newSubtasks(ctx, task, pidns, cgroupControllers)
+ contents["task"] = fs.newSubtasks(ctx, task, pidns, fakeCgroupControllers)
}
- if len(cgroupControllers) > 0 {
- contents["cgroup"] = fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, newCgroupData(cgroupControllers))
+ if len(fakeCgroupControllers) > 0 {
+ contents["cgroup"] = fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, newFakeCgroupData(fakeCgroupControllers))
+ } else {
+ contents["cgroup"] = fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &taskCgroupData{task: task})
}
taskInode := &taskInode{task: task}
@@ -226,11 +228,14 @@ func newIO(t *kernel.Task, isThreadGroup bool) *ioData {
return &ioData{ioUsage: t}
}
-// newCgroupData creates inode that shows cgroup information.
-// From man 7 cgroups: "For each cgroup hierarchy of which the process is a
-// member, there is one entry containing three colon-separated fields:
-// hierarchy-ID:controller-list:cgroup-path"
-func newCgroupData(controllers map[string]string) dynamicInode {
+// newFakeCgroupData creates an inode that shows fake cgroup
+// information passed in as mount options. From man 7 cgroups: "For
+// each cgroup hierarchy of which the process is a member, there is
+// one entry containing three colon-separated fields:
+// hierarchy-ID:controller-list:cgroup-path"
+//
+// TODO(b/182488796): Remove once all users adopt cgroupfs.
+func newFakeCgroupData(controllers map[string]string) dynamicInode {
var buf bytes.Buffer
// The hierarchy ids must be positive integers (for cgroup v1), but the
diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go
index fdae163d1..b294dfd6a 100644
--- a/pkg/sentry/fsimpl/proc/task_files.go
+++ b/pkg/sentry/fsimpl/proc/task_files.go
@@ -21,6 +21,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/fsbridge"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
@@ -122,8 +123,8 @@ func (d *auxvData) Generate(ctx context.Context, buf *bytes.Buffer) error {
buf.Grow((len(auxv) + 1) * 16)
for _, e := range auxv {
var tmp [16]byte
- usermem.ByteOrder.PutUint64(tmp[:8], e.Key)
- usermem.ByteOrder.PutUint64(tmp[8:], uint64(e.Value))
+ hostarch.ByteOrder.PutUint64(tmp[:8], e.Key)
+ hostarch.ByteOrder.PutUint64(tmp[8:], uint64(e.Value))
buf.Write(tmp[:])
}
var atNull [16]byte
@@ -168,15 +169,15 @@ func (d *cmdlineData) Generate(ctx context.Context, buf *bytes.Buffer) error {
defer m.DecUsers(ctx)
// Figure out the bounds of the exec arg we are trying to read.
- var ar usermem.AddrRange
+ var ar hostarch.AddrRange
switch d.arg {
case cmdlineDataArg:
- ar = usermem.AddrRange{
+ ar = hostarch.AddrRange{
Start: m.ArgvStart(),
End: m.ArgvEnd(),
}
case environDataArg:
- ar = usermem.AddrRange{
+ ar = hostarch.AddrRange{
Start: m.EnvvStart(),
End: m.EnvvEnd(),
}
@@ -192,7 +193,7 @@ func (d *cmdlineData) Generate(ctx context.Context, buf *bytes.Buffer) error {
// until Linux 4.9 (272ddc8b3735 "proc: don't use FOLL_FORCE for reading
// cmdline and environment").
writer := &bufferWriter{buf: buf}
- if n, err := m.CopyInTo(ctx, usermem.AddrRangeSeqOf(ar), writer, usermem.IOOpts{}); n == 0 || err != nil {
+ if n, err := m.CopyInTo(ctx, hostarch.AddrRangeSeqOf(ar), writer, usermem.IOOpts{}); n == 0 || err != nil {
// Nothing to copy or something went wrong.
return err
}
@@ -209,7 +210,7 @@ func (d *cmdlineData) Generate(ctx context.Context, buf *bytes.Buffer) error {
}
// There is no NULL terminator in the string, return into envp.
- arEnvv := usermem.AddrRange{
+ arEnvv := hostarch.AddrRange{
Start: m.EnvvStart(),
End: m.EnvvEnd(),
}
@@ -218,11 +219,11 @@ func (d *cmdlineData) Generate(ctx context.Context, buf *bytes.Buffer) error {
// https://elixir.bootlin.com/linux/v4.20/source/fs/proc/base.c#L208
// we'll return one page total between argv and envp because of the
// above page restrictions.
- if buf.Len() >= usermem.PageSize {
+ if buf.Len() >= hostarch.PageSize {
// Returned at least one page already, nothing else to add.
return nil
}
- remaining := usermem.PageSize - buf.Len()
+ remaining := hostarch.PageSize - buf.Len()
if int(arEnvv.Length()) > remaining {
end, ok := arEnvv.Start.AddLength(uint64(remaining))
if !ok {
@@ -230,7 +231,7 @@ func (d *cmdlineData) Generate(ctx context.Context, buf *bytes.Buffer) error {
}
arEnvv.End = end
}
- if _, err := m.CopyInTo(ctx, usermem.AddrRangeSeqOf(arEnvv), writer, usermem.IOOpts{}); err != nil {
+ if _, err := m.CopyInTo(ctx, hostarch.AddrRangeSeqOf(arEnvv), writer, usermem.IOOpts{}); err != nil {
return err
}
@@ -323,7 +324,7 @@ func (d *idMapData) Write(ctx context.Context, src usermem.IOSequence, offset in
// the system page size, and the write must be performed at the start of
// the file ..." - user_namespaces(7)
srclen := src.NumBytes()
- if srclen >= usermem.PageSize || offset != 0 {
+ if srclen >= hostarch.PageSize || offset != 0 {
return 0, syserror.EINVAL
}
b := make([]byte, srclen)
@@ -481,7 +482,7 @@ func (fd *memFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64
defer m.DecUsers(ctx)
// Buffer the read data because of MM locks
buf := make([]byte, dst.NumBytes())
- n, readErr := m.CopyIn(ctx, usermem.Addr(offset), buf, usermem.IOOpts{IgnorePermissions: true})
+ n, readErr := m.CopyIn(ctx, hostarch.Addr(offset), buf, usermem.IOOpts{IgnorePermissions: true})
if n > 0 {
if _, err := dst.CopyOut(ctx, buf[:n]); err != nil {
return 0, syserror.EFAULT
@@ -613,7 +614,7 @@ func (s *taskStatData) Generate(ctx context.Context, buf *bytes.Buffer) error {
rss = mm.ResidentSetSize()
}
})
- fmt.Fprintf(buf, "%d %d ", vss, rss/usermem.PageSize)
+ fmt.Fprintf(buf, "%d %d ", vss, rss/hostarch.PageSize)
// rsslim.
fmt.Fprintf(buf, "%d ", s.task.ThreadGroup().Limits().Get(limits.Rss).Cur)
@@ -655,7 +656,7 @@ func (s *statmData) Generate(ctx context.Context, buf *bytes.Buffer) error {
}
})
- fmt.Fprintf(buf, "%d %d 0 0 0 0 0\n", vss/usermem.PageSize, rss/usermem.PageSize)
+ fmt.Fprintf(buf, "%d %d 0 0 0 0 0\n", vss/hostarch.PageSize, rss/hostarch.PageSize)
return nil
}
@@ -774,7 +775,7 @@ func (o *oomScoreAdj) Write(ctx context.Context, src usermem.IOSequence, offset
}
// Limit input size so as not to impact performance if input size is large.
- src = src.TakeFirst(usermem.PageSize - 1)
+ src = src.TakeFirst(hostarch.PageSize - 1)
var v int32
n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts)
@@ -1099,3 +1100,32 @@ func (fd *namespaceFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) err
func (fd *namespaceFD) Release(ctx context.Context) {
fd.inode.DecRef(ctx)
}
+
+// taskCgroupData generates data for /proc/[pid]/cgroup.
+//
+// +stateify savable
+type taskCgroupData struct {
+ dynamicBytesFileSetAttr
+ task *kernel.Task
+}
+
+var _ dynamicInode = (*taskCgroupData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (d *taskCgroupData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ // When a task is existing on Linux, a task's cgroup set is cleared and
+ // reset to the initial cgroup set, which is essentially the set of root
+ // cgroups. Because of this, the /proc/<pid>/cgroup file is always readable
+ // on Linux throughout a task's lifetime.
+ //
+ // The sentry removes tasks from cgroups during the exit process, but
+ // doesn't move them into an initial cgroup set, so partway through task
+ // exit this file show a task is in no cgroups, which is incorrect. Instead,
+ // once a task has left its cgroups, we return an error.
+ if d.task.ExitState() >= kernel.TaskExitInitiated {
+ return syserror.ESRCH
+ }
+
+ d.task.GenerateProcTaskCgroup(buf)
+ return nil
+}
diff --git a/pkg/sentry/fsimpl/proc/task_net.go b/pkg/sentry/fsimpl/proc/task_net.go
index d4f6a5a9b..177cb828f 100644
--- a/pkg/sentry/fsimpl/proc/task_net.go
+++ b/pkg/sentry/fsimpl/proc/task_net.go
@@ -23,6 +23,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/inet"
@@ -34,7 +35,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/tcpip/header"
- "gvisor.dev/gvisor/pkg/usermem"
)
func (fs *filesystem) newTaskNetDir(ctx context.Context, task *kernel.Task) kernfs.Inode {
@@ -295,7 +295,7 @@ func networkToHost16(n uint16) uint16 {
// binary.BigEndian.Uint16() require a read of binary.BigEndian and an
// interface method call, defeating inlining.
buf := [2]byte{byte(n >> 8 & 0xff), byte(n & 0xff)}
- return usermem.ByteOrder.Uint16(buf[:])
+ return hostarch.ByteOrder.Uint16(buf[:])
}
func writeInetAddr(w io.Writer, family int, i linux.SockAddr) {
@@ -317,14 +317,14 @@ func writeInetAddr(w io.Writer, family int, i linux.SockAddr) {
// __be32 which is a typedef for an unsigned int, and is printed with
// %X. This means that for a little-endian machine, Linux prints the
// least-significant byte of the address first. To emulate this, we first
- // invert the byte order for the address using usermem.ByteOrder.Uint32,
+ // invert the byte order for the address using hostarch.ByteOrder.Uint32,
// which makes it have the equivalent encoding to a __be32 on a little
// endian machine. Note that this operation is a no-op on a big endian
// machine. Then similar to Linux, we format it with %X, which will print
// the most-significant byte of the __be32 address first, which is now
// actually the least-significant byte of the original address in
// linux.SockAddrInet.Addr on little endian machines, due to the conversion.
- addr := usermem.ByteOrder.Uint32(a.Addr[:])
+ addr := hostarch.ByteOrder.Uint32(a.Addr[:])
fmt.Fprintf(w, "%08X:%04X ", addr, port)
case linux.AF_INET6:
@@ -334,10 +334,10 @@ func writeInetAddr(w io.Writer, family int, i linux.SockAddr) {
}
port := networkToHost16(a.Port)
- addr0 := usermem.ByteOrder.Uint32(a.Addr[0:4])
- addr1 := usermem.ByteOrder.Uint32(a.Addr[4:8])
- addr2 := usermem.ByteOrder.Uint32(a.Addr[8:12])
- addr3 := usermem.ByteOrder.Uint32(a.Addr[12:16])
+ addr0 := hostarch.ByteOrder.Uint32(a.Addr[0:4])
+ addr1 := hostarch.ByteOrder.Uint32(a.Addr[4:8])
+ addr2 := hostarch.ByteOrder.Uint32(a.Addr[8:12])
+ addr3 := hostarch.ByteOrder.Uint32(a.Addr[12:16])
fmt.Fprintf(w, "%08X%08X%08X%08X:%04X ", addr0, addr1, addr2, addr3, port)
}
}
@@ -739,10 +739,10 @@ func (d *netRouteData) Generate(ctx context.Context, buf *bytes.Buffer) error {
)
if len(rt.GatewayAddr) == header.IPv4AddressSize {
flags |= linux.RTF_GATEWAY
- gw = usermem.ByteOrder.Uint32(rt.GatewayAddr)
+ gw = hostarch.ByteOrder.Uint32(rt.GatewayAddr)
}
if len(rt.DstAddr) == header.IPv4AddressSize {
- prefix = usermem.ByteOrder.Uint32(rt.DstAddr)
+ prefix = hostarch.ByteOrder.Uint32(rt.DstAddr)
}
l := fmt.Sprintf(
"%s\t%08X\t%08X\t%04X\t%d\t%d\t%d\t%08X\t%d\t%d\t%d",
diff --git a/pkg/sentry/fsimpl/proc/tasks.go b/pkg/sentry/fsimpl/proc/tasks.go
index fdc580610..7c7543f14 100644
--- a/pkg/sentry/fsimpl/proc/tasks.go
+++ b/pkg/sentry/fsimpl/proc/tasks.go
@@ -54,15 +54,15 @@ type tasksInode struct {
// '/proc/self' and '/proc/thread-self' have custom directory offsets in
// Linux. So handle them outside of OrderedChildren.
- // cgroupControllers is a map of controller name to directory in the
+ // fakeCgroupControllers is a map of controller name to directory in the
// cgroup hierarchy. These controllers are immutable and will be listed
// in /proc/pid/cgroup if not nil.
- cgroupControllers map[string]string
+ fakeCgroupControllers map[string]string
}
var _ kernfs.Inode = (*tasksInode)(nil)
-func (fs *filesystem) newTasksInode(ctx context.Context, k *kernel.Kernel, pidns *kernel.PIDNamespace, cgroupControllers map[string]string) *tasksInode {
+func (fs *filesystem) newTasksInode(ctx context.Context, k *kernel.Kernel, pidns *kernel.PIDNamespace, fakeCgroupControllers map[string]string) *tasksInode {
root := auth.NewRootCredentials(pidns.UserNamespace())
contents := map[string]kernfs.Inode{
"cpuinfo": fs.newInode(ctx, root, 0444, newStaticFileSetStat(cpuInfoData(k))),
@@ -76,11 +76,16 @@ func (fs *filesystem) newTasksInode(ctx context.Context, k *kernel.Kernel, pidns
"uptime": fs.newInode(ctx, root, 0444, &uptimeData{}),
"version": fs.newInode(ctx, root, 0444, &versionData{}),
}
+ // If fakeCgroupControllers are provided, don't create a cgroupfs backed
+ // /proc/cgroup as it will not match the fake controllers.
+ if len(fakeCgroupControllers) == 0 {
+ contents["cgroups"] = fs.newInode(ctx, root, 0444, &cgroupsData{})
+ }
inode := &tasksInode{
- pidns: pidns,
- fs: fs,
- cgroupControllers: cgroupControllers,
+ pidns: pidns,
+ fs: fs,
+ fakeCgroupControllers: fakeCgroupControllers,
}
inode.InodeAttrs.Init(ctx, root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0555)
inode.InitRefs()
@@ -118,7 +123,7 @@ func (i *tasksInode) Lookup(ctx context.Context, name string) (kernfs.Inode, err
return nil, syserror.ENOENT
}
- return i.fs.newTaskInode(ctx, task, i.pidns, true, i.cgroupControllers)
+ return i.fs.newTaskInode(ctx, task, i.pidns, true, i.fakeCgroupControllers)
}
// IterDirents implements kernfs.inodeDirectory.IterDirents.
diff --git a/pkg/sentry/fsimpl/proc/tasks_files.go b/pkg/sentry/fsimpl/proc/tasks_files.go
index 01b7a6678..e1a8b4409 100644
--- a/pkg/sentry/fsimpl/proc/tasks_files.go
+++ b/pkg/sentry/fsimpl/proc/tasks_files.go
@@ -21,6 +21,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@@ -28,7 +29,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/usage"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
// +stateify savable
@@ -270,7 +270,7 @@ func (*meminfoData) Generate(ctx context.Context, buf *bytes.Buffer) error {
anon := snapshot.Anonymous + snapshot.Tmpfs
file := snapshot.PageCache + snapshot.Mapped
// We don't actually have active/inactive LRUs, so just make up numbers.
- activeFile := (file / 2) &^ (usermem.PageSize - 1)
+ activeFile := (file / 2) &^ (hostarch.PageSize - 1)
inactiveFile := file - activeFile
fmt.Fprintf(buf, "MemTotal: %8d kB\n", totalSize/1024)
@@ -384,3 +384,19 @@ func (d *filesystemsData) Generate(ctx context.Context, buf *bytes.Buffer) error
k.VFS().GenerateProcFilesystems(buf)
return nil
}
+
+// cgroupsData backs /proc/cgroups.
+//
+// +stateify savable
+type cgroupsData struct {
+ dynamicBytesFileSetAttr
+}
+
+var _ dynamicInode = (*cgroupsData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (*cgroupsData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ r := kernel.KernelFromContext(ctx).CgroupRegistry()
+ r.GenerateProcCgroups(buf)
+ return nil
+}
diff --git a/pkg/sentry/fsimpl/proc/tasks_sys.go b/pkg/sentry/fsimpl/proc/tasks_sys.go
index fb274b78e..9b14dd6b9 100644
--- a/pkg/sentry/fsimpl/proc/tasks_sys.go
+++ b/pkg/sentry/fsimpl/proc/tasks_sys.go
@@ -21,6 +21,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/inet"
"gvisor.dev/gvisor/pkg/sentry/kernel"
@@ -214,7 +215,7 @@ func (d *tcpSackData) Write(ctx context.Context, src usermem.IOSequence, offset
}
// Limit the amount of memory allocated.
- src = src.TakeFirst(usermem.PageSize - 1)
+ src = src.TakeFirst(hostarch.PageSize - 1)
var v int32
n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts)
@@ -262,7 +263,7 @@ func (d *tcpRecoveryData) Write(ctx context.Context, src usermem.IOSequence, off
}
// Limit the amount of memory allocated.
- src = src.TakeFirst(usermem.PageSize - 1)
+ src = src.TakeFirst(hostarch.PageSize - 1)
var v int32
n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts)
@@ -318,7 +319,7 @@ func (d *tcpMemData) Write(ctx context.Context, src usermem.IOSequence, offset i
defer d.mu.Unlock()
// Limit the amount of memory allocated.
- src = src.TakeFirst(usermem.PageSize - 1)
+ src = src.TakeFirst(hostarch.PageSize - 1)
size, err := d.readSizeLocked()
if err != nil {
return 0, err
@@ -406,7 +407,7 @@ func (ipf *ipForwarding) Write(ctx context.Context, src usermem.IOSequence, offs
}
// Limit input size so as not to impact performance if input size is large.
- src = src.TakeFirst(usermem.PageSize - 1)
+ src = src.TakeFirst(hostarch.PageSize - 1)
var v int32
n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts)
@@ -463,7 +464,7 @@ func (pr *portRange) Write(ctx context.Context, src usermem.IOSequence, offset i
// Limit input size so as not to impact performance if input size is
// large.
- src = src.TakeFirst(usermem.PageSize - 1)
+ src = src.TakeFirst(hostarch.PageSize - 1)
ports := make([]int32, 2)
n, err := usermem.CopyInt32StringsInVec(ctx, src.IO, src.Addrs, ports, src.Opts)
diff --git a/pkg/sentry/fsimpl/proc/yama.go b/pkg/sentry/fsimpl/proc/yama.go
index aebfe8944..e039ec45e 100644
--- a/pkg/sentry/fsimpl/proc/yama.go
+++ b/pkg/sentry/fsimpl/proc/yama.go
@@ -21,6 +21,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@@ -62,7 +63,7 @@ func (s *yamaPtraceScope) Write(ctx context.Context, src usermem.IOSequence, off
}
// Limit the amount of memory allocated.
- src = src.TakeFirst(usermem.PageSize - 1)
+ src = src.TakeFirst(hostarch.PageSize - 1)
var v int32
n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts)
diff --git a/pkg/sentry/fsimpl/testutil/BUILD b/pkg/sentry/fsimpl/testutil/BUILD
index 400a97996..b3f9d1010 100644
--- a/pkg/sentry/fsimpl/testutil/BUILD
+++ b/pkg/sentry/fsimpl/testutil/BUILD
@@ -15,6 +15,7 @@ go_library(
"//pkg/context",
"//pkg/cpuid",
"//pkg/fspath",
+ "//pkg/hostarch",
"//pkg/memutil",
"//pkg/sentry/fsbridge",
"//pkg/sentry/fsimpl/tmpfs",
diff --git a/pkg/sentry/fsimpl/testutil/testutil.go b/pkg/sentry/fsimpl/testutil/testutil.go
index 1a8525b06..59e6f9c92 100644
--- a/pkg/sentry/fsimpl/testutil/testutil.go
+++ b/pkg/sentry/fsimpl/testutil/testutil.go
@@ -30,6 +30,8 @@ import (
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/usermem"
+
+ "gvisor.dev/gvisor/pkg/hostarch"
)
// System represents the context for a single test.
@@ -105,7 +107,7 @@ func (s *System) Destroy() {
// ReadToEnd reads the contents of fd until EOF to a string.
func (s *System) ReadToEnd(fd *vfs.FileDescription) (string, error) {
- buf := make([]byte, usermem.PageSize)
+ buf := make([]byte, hostarch.PageSize)
bufIOSeq := usermem.BytesIOSequence(buf)
opts := vfs.ReadOptions{}
diff --git a/pkg/sentry/fsimpl/timerfd/BUILD b/pkg/sentry/fsimpl/timerfd/BUILD
index fbb02a271..7ce7dc429 100644
--- a/pkg/sentry/fsimpl/timerfd/BUILD
+++ b/pkg/sentry/fsimpl/timerfd/BUILD
@@ -8,6 +8,7 @@ go_library(
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/context",
+ "//pkg/hostarch",
"//pkg/sentry/kernel/time",
"//pkg/sentry/vfs",
"//pkg/syserror",
diff --git a/pkg/sentry/fsimpl/timerfd/timerfd.go b/pkg/sentry/fsimpl/timerfd/timerfd.go
index 64d33c3a8..cbb8b67c5 100644
--- a/pkg/sentry/fsimpl/timerfd/timerfd.go
+++ b/pkg/sentry/fsimpl/timerfd/timerfd.go
@@ -19,6 +19,7 @@ import (
"sync/atomic"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
@@ -72,7 +73,7 @@ func (tfd *TimerFileDescription) Read(ctx context.Context, dst usermem.IOSequenc
}
if val := atomic.SwapUint64(&tfd.val, 0); val != 0 {
var buf [sizeofUint64]byte
- usermem.ByteOrder.PutUint64(buf[:], val)
+ hostarch.ByteOrder.PutUint64(buf[:], val)
if _, err := dst.CopyOut(ctx, buf[:]); err != nil {
// Linux does not undo consuming the number of
// expirations even if writing to userspace fails.
diff --git a/pkg/sentry/fsimpl/tmpfs/BUILD b/pkg/sentry/fsimpl/tmpfs/BUILD
index 09957c2b7..e21fddd7f 100644
--- a/pkg/sentry/fsimpl/tmpfs/BUILD
+++ b/pkg/sentry/fsimpl/tmpfs/BUILD
@@ -59,6 +59,7 @@ go_library(
"//pkg/amutex",
"//pkg/context",
"//pkg/fspath",
+ "//pkg/hostarch",
"//pkg/log",
"//pkg/refs",
"//pkg/refsvfs2",
diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file.go b/pkg/sentry/fsimpl/tmpfs/regular_file.go
index a6d161882..c45bddff6 100644
--- a/pkg/sentry/fsimpl/tmpfs/regular_file.go
+++ b/pkg/sentry/fsimpl/tmpfs/regular_file.go
@@ -22,6 +22,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
@@ -224,7 +225,7 @@ func (rf *regularFile) truncateLocked(newSize uint64) (bool, error) {
}
// AddMapping implements memmap.Mappable.AddMapping.
-func (rf *regularFile) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) error {
+func (rf *regularFile) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error {
rf.mapsMu.Lock()
defer rf.mapsMu.Unlock()
rf.dataMu.RLock()
@@ -240,7 +241,7 @@ func (rf *regularFile) AddMapping(ctx context.Context, ms memmap.MappingSpace, a
pagesBefore := rf.writableMappingPages
// ar is guaranteed to be page aligned per memmap.Mappable.
- rf.writableMappingPages += uint64(ar.Length() / usermem.PageSize)
+ rf.writableMappingPages += uint64(ar.Length() / hostarch.PageSize)
if rf.writableMappingPages < pagesBefore {
panic(fmt.Sprintf("Overflow while mapping potentially writable pages pointing to a tmpfs file. Before %v, after %v", pagesBefore, rf.writableMappingPages))
@@ -251,7 +252,7 @@ func (rf *regularFile) AddMapping(ctx context.Context, ms memmap.MappingSpace, a
}
// RemoveMapping implements memmap.Mappable.RemoveMapping.
-func (rf *regularFile) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) {
+func (rf *regularFile) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) {
rf.mapsMu.Lock()
defer rf.mapsMu.Unlock()
@@ -261,7 +262,7 @@ func (rf *regularFile) RemoveMapping(ctx context.Context, ms memmap.MappingSpace
pagesBefore := rf.writableMappingPages
// ar is guaranteed to be page aligned per memmap.Mappable.
- rf.writableMappingPages -= uint64(ar.Length() / usermem.PageSize)
+ rf.writableMappingPages -= uint64(ar.Length() / hostarch.PageSize)
if rf.writableMappingPages > pagesBefore {
panic(fmt.Sprintf("Underflow while unmapping potentially writable pages pointing to a tmpfs file. Before %v, after %v", pagesBefore, rf.writableMappingPages))
@@ -270,12 +271,12 @@ func (rf *regularFile) RemoveMapping(ctx context.Context, ms memmap.MappingSpace
}
// CopyMapping implements memmap.Mappable.CopyMapping.
-func (rf *regularFile) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, writable bool) error {
+func (rf *regularFile) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error {
return rf.AddMapping(ctx, ms, dstAR, offset, writable)
}
// Translate implements memmap.Mappable.Translate.
-func (rf *regularFile) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) {
+func (rf *regularFile) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) {
rf.dataMu.Lock()
defer rf.dataMu.Unlock()
@@ -307,7 +308,7 @@ func (rf *regularFile) Translate(ctx context.Context, required, optional memmap.
Source: segMR,
File: rf.memFile,
Offset: seg.FileRangeOf(segMR).Start,
- Perms: usermem.AnyAccess,
+ Perms: hostarch.AnyAccess,
})
translatedEnd = segMR.End
}
@@ -487,6 +488,7 @@ func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) (
// ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap.
func (fd *regularFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error {
file := fd.inode().impl.(*regularFile)
+ opts.SentryOwnedContent = true
return vfs.GenericConfigureMMap(&fd.vfsfd, file, opts)
}
@@ -539,7 +541,7 @@ func (rw *regularFileReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, er
switch {
case seg.Ok():
// Get internal mappings.
- ims, err := rw.file.memFile.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), usermem.Read)
+ ims, err := rw.file.memFile.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), hostarch.Read)
if err != nil {
return done, err
}
@@ -608,7 +610,7 @@ func (rw *regularFileReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64,
//
// See Linux, mm/filemap.c:generic_perform_write() and
// mm/shmem.c:shmem_write_begin().
- if pgstart := uint64(usermem.Addr(rw.file.size).RoundDown()); end > pgstart {
+ if pgstart := uint64(hostarch.Addr(rw.file.size).RoundDown()); end > pgstart {
end = pgstart
}
if end <= rw.off {
@@ -619,8 +621,8 @@ func (rw *regularFileReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64,
// Page-aligned mr for when we need to allocate memory. RoundUp can't
// overflow since end is an int64.
- pgstartaddr := usermem.Addr(rw.off).RoundDown()
- pgendaddr, _ := usermem.Addr(end).RoundUp()
+ pgstartaddr := hostarch.Addr(rw.off).RoundDown()
+ pgendaddr, _ := hostarch.Addr(end).RoundUp()
pgMR := memmap.MappableRange{uint64(pgstartaddr), uint64(pgendaddr)}
var (
@@ -633,7 +635,7 @@ func (rw *regularFileReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64,
switch {
case seg.Ok():
// Get internal mappings.
- ims, err := rw.file.memFile.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), usermem.Write)
+ ims, err := rw.file.memFile.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), hostarch.Write)
if err != nil {
retErr = err
goto exitLoop
diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
index 8df81f589..9ae25ce9e 100644
--- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go
+++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
@@ -36,6 +36,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sentry/pgalloc"
@@ -43,7 +44,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/vfs/memxattr"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
// Name is the default filesystem name.
@@ -252,8 +252,8 @@ func (d *dentry) releaseChildrenLocked(ctx context.Context) {
// immutable
var globalStatfs = linux.Statfs{
Type: linux.TMPFS_MAGIC,
- BlockSize: usermem.PageSize,
- FragmentSize: usermem.PageSize,
+ BlockSize: hostarch.PageSize,
+ FragmentSize: hostarch.PageSize,
NameLength: linux.NAME_MAX,
// tmpfs currently does not support configurable size limits. In Linux,
@@ -263,9 +263,9 @@ var globalStatfs = linux.Statfs{
// chosen to ensure that BlockSize * Blocks does not overflow int64 (which
// applications may also handle incorrectly).
// TODO(b/29637826): allow configuring a tmpfs size and enforce it.
- Blocks: math.MaxInt64 / usermem.PageSize,
- BlocksFree: math.MaxInt64 / usermem.PageSize,
- BlocksAvailable: math.MaxInt64 / usermem.PageSize,
+ Blocks: math.MaxInt64 / hostarch.PageSize,
+ BlocksFree: math.MaxInt64 / hostarch.PageSize,
+ BlocksAvailable: math.MaxInt64 / hostarch.PageSize,
}
// dentry implements vfs.DentryImpl.
@@ -485,7 +485,7 @@ func (i *inode) statTo(stat *linux.Statx) {
linux.STATX_UID | linux.STATX_GID | linux.STATX_INO | linux.STATX_SIZE |
linux.STATX_BLOCKS | linux.STATX_ATIME | linux.STATX_CTIME |
linux.STATX_MTIME
- stat.Blksize = usermem.PageSize
+ stat.Blksize = hostarch.PageSize
stat.Nlink = atomic.LoadUint32(&i.nlink)
stat.UID = atomic.LoadUint32(&i.uid)
stat.GID = atomic.LoadUint32(&i.gid)
diff --git a/pkg/sentry/fsimpl/verity/BUILD b/pkg/sentry/fsimpl/verity/BUILD
index e265be0ee..d473a922d 100644
--- a/pkg/sentry/fsimpl/verity/BUILD
+++ b/pkg/sentry/fsimpl/verity/BUILD
@@ -14,13 +14,16 @@ go_library(
"//pkg/abi/linux",
"//pkg/context",
"//pkg/fspath",
+ "//pkg/hostarch",
"//pkg/marshal/primitive",
"//pkg/merkletree",
"//pkg/refsvfs2",
+ "//pkg/safemem",
"//pkg/sentry/arch",
"//pkg/sentry/fs/lock",
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
+ "//pkg/sentry/memmap",
"//pkg/sentry/socket/unix/transport",
"//pkg/sentry/vfs",
"//pkg/sync",
diff --git a/pkg/sentry/fsimpl/verity/verity.go b/pkg/sentry/fsimpl/verity/verity.go
index 0d9b0ee2c..6d6e0e77a 100644
--- a/pkg/sentry/fsimpl/verity/verity.go
+++ b/pkg/sentry/fsimpl/verity/verity.go
@@ -34,6 +34,8 @@
package verity
import (
+ "bytes"
+ "encoding/hex"
"encoding/json"
"fmt"
"math"
@@ -44,13 +46,16 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fspath"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/marshal/primitive"
"gvisor.dev/gvisor/pkg/merkletree"
"gvisor.dev/gvisor/pkg/refsvfs2"
+ "gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/arch"
fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserror"
@@ -101,6 +106,13 @@ var (
verityMu sync.RWMutex
)
+// Mount option names for verityfs.
+const (
+ moptLowerPath = "lower_path"
+ moptRootHash = "root_hash"
+ moptRootName = "root_name"
+)
+
// HashAlgorithm is a type specifying the algorithm used to hash the file
// content.
type HashAlgorithm int
@@ -167,6 +179,9 @@ type filesystem struct {
// system.
alg HashAlgorithm
+ // opts is the string mount options passed to opts.Data.
+ opts string
+
// renameMu synchronizes renaming with non-renaming operations in order
// to ensure consistent lock ordering between dentry.dirMu in different
// dentries.
@@ -189,9 +204,6 @@ type filesystem struct {
//
// +stateify savable
type InternalFilesystemOptions struct {
- // RootMerkleFileName is the name of the verity root Merkle tree file.
- RootMerkleFileName string
-
// LowerName is the name of the filesystem wrapped by verity fs.
LowerName string
@@ -199,9 +211,6 @@ type InternalFilesystemOptions struct {
// system.
Alg HashAlgorithm
- // RootHash is the root hash of the overall verity file system.
- RootHash []byte
-
// AllowRuntimeEnable specifies whether the verity file system allows
// enabling verification for files (i.e. building Merkle trees) during
// runtime.
@@ -235,28 +244,99 @@ func alertIntegrityViolation(msg string) error {
// GetFilesystem implements vfs.FilesystemType.GetFilesystem.
func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
+ mopts := vfs.GenericParseMountOptions(opts.Data)
+ var rootHash []byte
+ if encodedRootHash, ok := mopts[moptRootHash]; ok {
+ delete(mopts, moptRootHash)
+ hash, err := hex.DecodeString(encodedRootHash)
+ if err != nil {
+ ctx.Warningf("verity.FilesystemType.GetFilesystem: Failed to decode root hash: %v", err)
+ return nil, nil, syserror.EINVAL
+ }
+ rootHash = hash
+ }
+ var lowerPathname string
+ if path, ok := mopts[moptLowerPath]; ok {
+ delete(mopts, moptLowerPath)
+ lowerPathname = path
+ }
+ rootName := "root"
+ if root, ok := mopts[moptRootName]; ok {
+ delete(mopts, moptRootName)
+ rootName = root
+ }
+
+ // Check for unparsed options.
+ if len(mopts) != 0 {
+ ctx.Warningf("verity.FilesystemType.GetFilesystem: unknown options: %v", mopts)
+ return nil, nil, syserror.EINVAL
+ }
+
+ // Handle internal options.
iopts, ok := opts.InternalData.(InternalFilesystemOptions)
- if !ok {
+ if len(lowerPathname) == 0 && !ok {
ctx.Warningf("verity.FilesystemType.GetFilesystem: missing verity configs")
return nil, nil, syserror.EINVAL
}
+ if len(lowerPathname) != 0 {
+ if ok {
+ ctx.Warningf("verity.FilesystemType.GetFilesystem: unexpected verity configs with specified lower path")
+ return nil, nil, syserror.EINVAL
+ }
+ iopts = InternalFilesystemOptions{
+ AllowRuntimeEnable: len(rootHash) == 0,
+ Action: ErrorOnViolation,
+ }
+ }
action = iopts.Action
- // Mount the lower file system. The lower file system is wrapped inside
- // verity, and should not be exposed or connected.
- mopts := &vfs.MountOptions{
- GetFilesystemOptions: iopts.LowerGetFSOptions,
- InternalMount: true,
- }
- mnt, err := vfsObj.MountDisconnected(ctx, creds, "", iopts.LowerName, mopts)
- if err != nil {
- return nil, nil, err
+ var lowerMount *vfs.Mount
+ var mountedLowerVD vfs.VirtualDentry
+ // Use an existing mount if lowerPath is provided.
+ if len(lowerPathname) != 0 {
+ vfsroot := vfs.RootFromContext(ctx)
+ if vfsroot.Ok() {
+ defer vfsroot.DecRef(ctx)
+ }
+ lowerPath := fspath.Parse(lowerPathname)
+ if !lowerPath.Absolute {
+ ctx.Infof("verity.FilesystemType.GetFilesystem: lower_path %q must be absolute", lowerPathname)
+ return nil, nil, syserror.EINVAL
+ }
+ var err error
+ mountedLowerVD, err = vfsObj.GetDentryAt(ctx, creds, &vfs.PathOperation{
+ Root: vfsroot,
+ Start: vfsroot,
+ Path: lowerPath,
+ FollowFinalSymlink: true,
+ }, &vfs.GetDentryOptions{
+ CheckSearchable: true,
+ })
+ if err != nil {
+ ctx.Infof("verity.FilesystemType.GetFilesystem: failed to resolve lower_path %q: %v", lowerPathname, err)
+ return nil, nil, err
+ }
+ lowerMount = mountedLowerVD.Mount()
+ defer mountedLowerVD.DecRef(ctx)
+ } else {
+ // Mount the lower file system. The lower file system is wrapped inside
+ // verity, and should not be exposed or connected.
+ mountOpts := &vfs.MountOptions{
+ GetFilesystemOptions: iopts.LowerGetFSOptions,
+ InternalMount: true,
+ }
+ mnt, err := vfsObj.MountDisconnected(ctx, creds, "", iopts.LowerName, mountOpts)
+ if err != nil {
+ return nil, nil, err
+ }
+ lowerMount = mnt
}
fs := &filesystem{
creds: creds.Fork(),
alg: iopts.Alg,
- lowerMount: mnt,
+ lowerMount: lowerMount,
+ opts: opts.Data,
allowRuntimeEnable: iopts.AllowRuntimeEnable,
}
fs.vfsfs.Init(vfsObj, &fstype, fs)
@@ -264,11 +344,11 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
// Construct the root dentry.
d := fs.newDentry()
d.refs = 1
- lowerVD := vfs.MakeVirtualDentry(mnt, mnt.Root())
+ lowerVD := vfs.MakeVirtualDentry(lowerMount, lowerMount.Root())
lowerVD.IncRef()
d.lowerVD = lowerVD
- rootMerkleName := merkleRootPrefix + iopts.RootMerkleFileName
+ rootMerkleName := merkleRootPrefix + rootName
lowerMerkleVD, err := vfsObj.GetDentryAt(ctx, fs.creds, &vfs.PathOperation{
Root: lowerVD,
@@ -348,7 +428,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
d.mode = uint32(stat.Mode)
d.uid = stat.UID
d.gid = stat.GID
- d.hash = make([]byte, len(iopts.RootHash))
+ d.hash = make([]byte, len(rootHash))
d.childrenNames = make(map[string]struct{})
if !d.isDir() {
@@ -423,7 +503,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
}
d.hashMu.Lock()
- copy(d.hash, iopts.RootHash)
+ copy(d.hash, rootHash)
d.hashMu.Unlock()
d.vfsd.Init(d)
@@ -439,7 +519,7 @@ func (fs *filesystem) Release(ctx context.Context) {
// MountOptions implements vfs.FilesystemImpl.MountOptions.
func (fs *filesystem) MountOptions() string {
- return ""
+ return fs.opts
}
// dentry implements vfs.DentryImpl.
@@ -720,6 +800,10 @@ type fileDescription struct {
// underlying file system.
lowerFD *vfs.FileDescription
+ // lowerMappable is the memmap.Mappable corresponding to this file in the
+ // underlying file system.
+ lowerMappable memmap.Mappable
+
// merkleReader is the read-only FileDescription corresponding to the
// Merkle tree file in the underlying file system.
merkleReader *vfs.FileDescription
@@ -1033,7 +1117,7 @@ func (fd *fileDescription) enableVerity(ctx context.Context) (uintptr, error) {
}
// measureVerity returns the hash of fd, saved in verityDigest.
-func (fd *fileDescription) measureVerity(ctx context.Context, verityDigest usermem.Addr) (uintptr, error) {
+func (fd *fileDescription) measureVerity(ctx context.Context, verityDigest hostarch.Addr) (uintptr, error) {
t := kernel.TaskFromContext(ctx)
if t == nil {
return 0, syserror.EINVAL
@@ -1072,11 +1156,11 @@ func (fd *fileDescription) measureVerity(ctx context.Context, verityDigest userm
}
// Now copy the root hash bytes to the memory after metadata.
- _, err := t.CopyOutBytes(usermem.Addr(uintptr(verityDigest)+linux.SizeOfDigestMetadata), fd.d.hash)
+ _, err := t.CopyOutBytes(hostarch.Addr(uintptr(verityDigest)+linux.SizeOfDigestMetadata), fd.d.hash)
return 0, err
}
-func (fd *fileDescription) verityFlags(ctx context.Context, flags usermem.Addr) (uintptr, error) {
+func (fd *fileDescription) verityFlags(ctx context.Context, flags hostarch.Addr) (uintptr, error) {
f := int32(0)
fd.d.hashMu.RLock()
@@ -1199,6 +1283,24 @@ func (fd *fileDescription) Write(ctx context.Context, src usermem.IOSequence, op
return 0, syserror.EROFS
}
+// ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap.
+func (fd *fileDescription) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error {
+ if err := fd.lowerFD.ConfigureMMap(ctx, opts); err != nil {
+ return err
+ }
+ fd.lowerMappable = opts.Mappable
+ if opts.MappingIdentity != nil {
+ opts.MappingIdentity.DecRef(ctx)
+ opts.MappingIdentity = nil
+ }
+
+ // Check if mmap is allowed on the lower filesystem.
+ if !opts.SentryOwnedContent {
+ return syserror.ENODEV
+ }
+ return vfs.GenericConfigureMMap(&fd.vfsfd, fd, opts)
+}
+
// LockBSD implements vfs.FileDescriptionImpl.LockBSD.
func (fd *fileDescription) LockBSD(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, block fslock.Blocker) error {
return fd.lowerFD.LockBSD(ctx, ownerPID, t, block)
@@ -1224,6 +1326,115 @@ func (fd *fileDescription) TestPOSIX(ctx context.Context, uid fslock.UniqueID, t
return fd.lowerFD.TestPOSIX(ctx, uid, t, r)
}
+// Translate implements memmap.Mappable.Translate.
+func (fd *fileDescription) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) {
+ ts, err := fd.lowerMappable.Translate(ctx, required, optional, at)
+ if err != nil {
+ return ts, err
+ }
+
+ // dataSize is the size of the whole file.
+ dataSize, err := fd.merkleReader.GetXattr(ctx, &vfs.GetXattrOptions{
+ Name: merkleSizeXattr,
+ Size: sizeOfStringInt32,
+ })
+
+ // The Merkle tree file for the child should have been created and
+ // contains the expected xattrs. If the xattr does not exist, it
+ // indicates unexpected modifications to the file system.
+ if err == syserror.ENODATA {
+ return ts, alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s: %v", merkleSizeXattr, err))
+ }
+ if err != nil {
+ return ts, err
+ }
+
+ // The dataSize xattr should be an integer. If it's not, it indicates
+ // unexpected modifications to the file system.
+ size, err := strconv.Atoi(dataSize)
+ if err != nil {
+ return ts, alertIntegrityViolation(fmt.Sprintf("Failed to convert xattr %s to int: %v", merkleSizeXattr, err))
+ }
+
+ merkleReader := FileReadWriteSeeker{
+ FD: fd.merkleReader,
+ Ctx: ctx,
+ }
+
+ for _, t := range ts {
+ // Content integrity relies on sentry owning the backing data. MapInternal is guaranteed
+ // to fetch sentry owned memory because we disallow verity mmaps otherwise.
+ ims, err := t.File.MapInternal(memmap.FileRange{t.Offset, t.Offset + t.Source.Length()}, hostarch.Read)
+ if err != nil {
+ return nil, err
+ }
+ dataReader := mmapReadSeeker{ims, t.Source.Start}
+ var buf bytes.Buffer
+ _, err = merkletree.Verify(&merkletree.VerifyParams{
+ Out: &buf,
+ File: &dataReader,
+ Tree: &merkleReader,
+ Size: int64(size),
+ Name: fd.d.name,
+ Mode: fd.d.mode,
+ UID: fd.d.uid,
+ GID: fd.d.gid,
+ HashAlgorithms: fd.d.fs.alg.toLinuxHashAlg(),
+ ReadOffset: int64(t.Source.Start),
+ ReadSize: int64(t.Source.Length()),
+ Expected: fd.d.hash,
+ DataAndTreeInSameFile: false,
+ })
+ if err != nil {
+ return ts, alertIntegrityViolation(fmt.Sprintf("Verification failed: %v", err))
+ }
+ }
+ return ts, err
+}
+
+// AddMapping implements memmap.Mappable.AddMapping.
+func (fd *fileDescription) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error {
+ return fd.lowerMappable.AddMapping(ctx, ms, ar, offset, writable)
+}
+
+// RemoveMapping implements memmap.Mappable.RemoveMapping.
+func (fd *fileDescription) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) {
+ fd.lowerMappable.RemoveMapping(ctx, ms, ar, offset, writable)
+}
+
+// CopyMapping implements memmap.Mappable.CopyMapping.
+func (fd *fileDescription) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error {
+ return fd.lowerMappable.CopyMapping(ctx, ms, srcAR, dstAR, offset, writable)
+}
+
+// InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable.
+func (fd *fileDescription) InvalidateUnsavable(context.Context) error {
+ return nil
+}
+
+// mmapReadSeeker is a helper struct used by fileDescription.Translate to pass
+// a safemem.BlockSeq pointing to the mapped region as io.ReaderAt.
+type mmapReadSeeker struct {
+ safemem.BlockSeq
+ Offset uint64
+}
+
+// ReadAt implements io.ReaderAt.ReadAt. off is the offset into the mapped file.
+func (r *mmapReadSeeker) ReadAt(p []byte, off int64) (int, error) {
+ bs := r.BlockSeq
+ // Adjust the offset into the mapped file to get the offset into the internally
+ // mapped region.
+ readOffset := off - int64(r.Offset)
+ if readOffset < 0 {
+ return 0, syserror.EINVAL
+ }
+ bs.DropFirst64(uint64(readOffset))
+ view := bs.TakeFirst64(uint64(len(p)))
+ dst := safemem.BlockSeqOf(safemem.BlockFromSafeSlice(p))
+ n, err := safemem.CopySeq(dst, view)
+ return int(n), err
+}
+
// FileReadWriteSeeker is a helper struct to pass a vfs.FileDescription as
// io.Reader/io.Writer/io.ReadSeeker/io.ReaderAt/io.WriterAt/etc.
type FileReadWriteSeeker struct {
diff --git a/pkg/sentry/fsimpl/verity/verity_test.go b/pkg/sentry/fsimpl/verity/verity_test.go
index 57bd65202..5c78a0019 100644
--- a/pkg/sentry/fsimpl/verity/verity_test.go
+++ b/pkg/sentry/fsimpl/verity/verity_test.go
@@ -89,10 +89,11 @@ func newVerityRoot(t *testing.T, hashAlg HashAlgorithm) (*vfs.VirtualFilesystem,
AllowUserMount: true,
})
+ data := "root_name=" + rootMerkleFilename
mntns, err := vfsObj.NewMountNamespace(ctx, auth.CredentialsFromContext(ctx), "", "verity", &vfs.MountOptions{
GetFilesystemOptions: vfs.GetFilesystemOptions{
+ Data: data,
InternalData: InternalFilesystemOptions{
- RootMerkleFileName: rootMerkleFilename,
LowerName: "tmpfs",
Alg: hashAlg,
AllowRuntimeEnable: true,
diff --git a/pkg/sentry/hostmm/BUILD b/pkg/sentry/hostmm/BUILD
index 300b7ccce..66fa1ad40 100644
--- a/pkg/sentry/hostmm/BUILD
+++ b/pkg/sentry/hostmm/BUILD
@@ -13,8 +13,8 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/fd",
+ "//pkg/hostarch",
"//pkg/log",
- "//pkg/usermem",
"@org_golang_x_sys//unix:go_default_library",
],
)
diff --git a/pkg/sentry/hostmm/hostmm.go b/pkg/sentry/hostmm/hostmm.go
index c47b96b54..285ea9050 100644
--- a/pkg/sentry/hostmm/hostmm.go
+++ b/pkg/sentry/hostmm/hostmm.go
@@ -23,8 +23,8 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/fd"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
- "gvisor.dev/gvisor/pkg/usermem"
)
// NotifyCurrentMemcgPressureCallback requests that f is called whenever the
@@ -88,7 +88,7 @@ func NotifyCurrentMemcgPressureCallback(f func(), level string) (func(), error)
if n != sizeofUint64 {
panic(fmt.Sprintf("short read from memory pressure level eventfd: got %d bytes, wanted %d", n, sizeofUint64))
}
- val := usermem.ByteOrder.Uint64(buf[:])
+ val := hostarch.ByteOrder.Uint64(buf[:])
if val >= stopVal {
// Assume this was due to the notifier's "destructor" (the
// function returned by NotifyCurrentMemcgPressureCallback
@@ -103,7 +103,7 @@ func NotifyCurrentMemcgPressureCallback(f func(), level string) (func(), error)
return func() {
rw := fd.NewReadWriter(eventFD.FD())
var buf [sizeofUint64]byte
- usermem.ByteOrder.PutUint64(buf[:], stopVal)
+ hostarch.ByteOrder.PutUint64(buf[:], stopVal)
for {
n, err := rw.Write(buf[:])
if err != nil {
diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD
index c53e3e720..a1ec6daab 100644
--- a/pkg/sentry/kernel/BUILD
+++ b/pkg/sentry/kernel/BUILD
@@ -141,6 +141,7 @@ go_library(
srcs = [
"abstract_socket_namespace.go",
"aio.go",
+ "cgroup.go",
"context.go",
"fd_table.go",
"fd_table_refs.go",
@@ -178,6 +179,7 @@ go_library(
"task.go",
"task_acct.go",
"task_block.go",
+ "task_cgroup.go",
"task_clone.go",
"task_context.go",
"task_exec.go",
@@ -226,6 +228,7 @@ go_library(
"//pkg/eventchannel",
"//pkg/fspath",
"//pkg/goid",
+ "//pkg/hostarch",
"//pkg/log",
"//pkg/marshal",
"//pkg/marshal/primitive",
@@ -240,6 +243,7 @@ go_library(
"//pkg/sentry/fs/lock",
"//pkg/sentry/fs/timerfd",
"//pkg/sentry/fsbridge",
+ "//pkg/sentry/fsimpl/kernfs",
"//pkg/sentry/fsimpl/pipefs",
"//pkg/sentry/fsimpl/sockfs",
"//pkg/sentry/fsimpl/timerfd",
@@ -294,6 +298,7 @@ go_test(
deps = [
"//pkg/abi",
"//pkg/context",
+ "//pkg/hostarch",
"//pkg/sentry/arch",
"//pkg/sentry/contexttest",
"//pkg/sentry/fs",
@@ -305,6 +310,5 @@ go_test(
"//pkg/sentry/usage",
"//pkg/sync",
"//pkg/syserror",
- "//pkg/usermem",
],
)
diff --git a/pkg/sentry/kernel/cgroup.go b/pkg/sentry/kernel/cgroup.go
new file mode 100644
index 000000000..1f1c63f37
--- /dev/null
+++ b/pkg/sentry/kernel/cgroup.go
@@ -0,0 +1,281 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package kernel
+
+import (
+ "bytes"
+ "fmt"
+ "sort"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/sync"
+)
+
+// InvalidCgroupHierarchyID indicates an uninitialized hierarchy ID.
+const InvalidCgroupHierarchyID uint32 = 0
+
+// CgroupControllerType is the name of a cgroup controller.
+type CgroupControllerType string
+
+// CgroupController is the common interface to cgroup controllers available to
+// the entire sentry. The controllers themselves are defined by cgroupfs.
+//
+// Callers of this interface are often unable access synchronization needed to
+// ensure returned values remain valid. Some of values returned from this
+// interface are thus snapshots in time, and may become stale. This is ok for
+// many callers like procfs.
+type CgroupController interface {
+ // Returns the type of this cgroup controller (ex "memory", "cpu"). Returned
+ // value is valid for the lifetime of the controller.
+ Type() CgroupControllerType
+
+ // Hierarchy returns the ID of the hierarchy this cgroup controller is
+ // attached to. Returned value is valid for the lifetime of the controller.
+ HierarchyID() uint32
+
+ // Filesystem returns the filesystem this controller is attached to.
+ // Returned value is valid for the lifetime of the controller.
+ Filesystem() *vfs.Filesystem
+
+ // RootCgroup returns the root cgroup for this controller. Returned value is
+ // valid for the lifetime of the controller.
+ RootCgroup() Cgroup
+
+ // NumCgroups returns the number of cgroups managed by this controller.
+ // Returned value is a snapshot in time.
+ NumCgroups() uint64
+
+ // Enabled returns whether this controller is enabled. Returned value is a
+ // snapshot in time.
+ Enabled() bool
+}
+
+// Cgroup represents a named pointer to a cgroup in cgroupfs. When a task enters
+// a cgroup, it holds a reference on the underlying dentry pointing to the
+// cgroup.
+//
+// +stateify savable
+type Cgroup struct {
+ *kernfs.Dentry
+ CgroupImpl
+}
+
+func (c *Cgroup) decRef() {
+ c.Dentry.DecRef(context.Background())
+}
+
+// Path returns the absolute path of c, relative to its hierarchy root.
+func (c *Cgroup) Path() string {
+ return c.FSLocalPath()
+}
+
+// HierarchyID returns the id of the hierarchy that contains this cgroup.
+func (c *Cgroup) HierarchyID() uint32 {
+ // Note: a cgroup is guaranteed to have at least one controller.
+ return c.Controllers()[0].HierarchyID()
+}
+
+// CgroupImpl is the common interface to cgroups.
+type CgroupImpl interface {
+ Controllers() []CgroupController
+ Enter(t *Task)
+ Leave(t *Task)
+}
+
+// hierarchy represents a cgroupfs filesystem instance, with a unique set of
+// controllers attached to it. Multiple cgroupfs mounts may reference the same
+// hierarchy.
+//
+// +stateify savable
+type hierarchy struct {
+ id uint32
+ // These are a subset of the controllers in CgroupRegistry.controllers,
+ // grouped here by hierarchy for conveninent lookup.
+ controllers map[CgroupControllerType]CgroupController
+ // fs is not owned by hierarchy. The FS is responsible for unregistering the
+ // hierarchy on destruction, which removes this association.
+ fs *vfs.Filesystem
+}
+
+func (h *hierarchy) match(ctypes []CgroupControllerType) bool {
+ if len(ctypes) != len(h.controllers) {
+ return false
+ }
+ for _, ty := range ctypes {
+ if _, ok := h.controllers[ty]; !ok {
+ return false
+ }
+ }
+ return true
+}
+
+// CgroupRegistry tracks the active set of cgroup controllers on the system.
+//
+// +stateify savable
+type CgroupRegistry struct {
+ // lastHierarchyID is the id of the last allocated cgroup hierarchy. Valid
+ // ids are from 1 to math.MaxUint32. Must be accessed through atomic ops.
+ //
+ lastHierarchyID uint32
+
+ mu sync.Mutex `state:"nosave"`
+
+ // controllers is the set of currently known cgroup controllers on the
+ // system. Protected by mu.
+ //
+ // +checklocks:mu
+ controllers map[CgroupControllerType]CgroupController
+
+ // hierarchies is the active set of cgroup hierarchies. Protected by mu.
+ //
+ // +checklocks:mu
+ hierarchies map[uint32]hierarchy
+}
+
+func newCgroupRegistry() *CgroupRegistry {
+ return &CgroupRegistry{
+ controllers: make(map[CgroupControllerType]CgroupController),
+ hierarchies: make(map[uint32]hierarchy),
+ }
+}
+
+// nextHierarchyID returns a newly allocated, unique hierarchy ID.
+func (r *CgroupRegistry) nextHierarchyID() (uint32, error) {
+ if hid := atomic.AddUint32(&r.lastHierarchyID, 1); hid != 0 {
+ return hid, nil
+ }
+ return InvalidCgroupHierarchyID, fmt.Errorf("cgroup hierarchy ID overflow")
+}
+
+// FindHierarchy returns a cgroup filesystem containing exactly the set of
+// controllers named in names. If no such FS is found, FindHierarchy return
+// nil. FindHierarchy takes a reference on the returned FS, which is transferred
+// to the caller.
+func (r *CgroupRegistry) FindHierarchy(ctypes []CgroupControllerType) *vfs.Filesystem {
+ r.mu.Lock()
+ defer r.mu.Unlock()
+
+ for _, h := range r.hierarchies {
+ if h.match(ctypes) {
+ h.fs.IncRef()
+ return h.fs
+ }
+ }
+
+ return nil
+}
+
+// Register registers the provided set of controllers with the registry as a new
+// hierarchy. If any controller is already registered, the function returns an
+// error without modifying the registry. The hierarchy can be later referenced
+// by the returned id.
+func (r *CgroupRegistry) Register(cs []CgroupController) (uint32, error) {
+ r.mu.Lock()
+ defer r.mu.Unlock()
+
+ if len(cs) == 0 {
+ return InvalidCgroupHierarchyID, fmt.Errorf("can't register hierarchy with no controllers")
+ }
+
+ for _, c := range cs {
+ if _, ok := r.controllers[c.Type()]; ok {
+ return InvalidCgroupHierarchyID, fmt.Errorf("controllers may only be mounted on a single hierarchy")
+ }
+ }
+
+ hid, err := r.nextHierarchyID()
+ if err != nil {
+ return hid, err
+ }
+
+ h := hierarchy{
+ id: hid,
+ controllers: make(map[CgroupControllerType]CgroupController),
+ fs: cs[0].Filesystem(),
+ }
+ for _, c := range cs {
+ n := c.Type()
+ r.controllers[n] = c
+ h.controllers[n] = c
+ }
+ r.hierarchies[hid] = h
+ return hid, nil
+}
+
+// Unregister removes a previously registered hierarchy from the registry. If
+// the controller was not previously registered, Unregister is a no-op.
+func (r *CgroupRegistry) Unregister(hid uint32) {
+ r.mu.Lock()
+ defer r.mu.Unlock()
+
+ if h, ok := r.hierarchies[hid]; ok {
+ for name, _ := range h.controllers {
+ delete(r.controllers, name)
+ }
+ delete(r.hierarchies, hid)
+ }
+}
+
+// computeInitialGroups takes a reference on each of the returned cgroups. The
+// caller takes ownership of this returned reference.
+func (r *CgroupRegistry) computeInitialGroups(inherit map[Cgroup]struct{}) map[Cgroup]struct{} {
+ r.mu.Lock()
+ defer r.mu.Unlock()
+
+ ctlSet := make(map[CgroupControllerType]CgroupController)
+ cgset := make(map[Cgroup]struct{})
+
+ // Remember controllers from the inherited cgroups set...
+ for cg, _ := range inherit {
+ cg.IncRef() // Ref transferred to caller.
+ for _, ctl := range cg.Controllers() {
+ ctlSet[ctl.Type()] = ctl
+ cgset[cg] = struct{}{}
+ }
+ }
+
+ // ... and add the root cgroups of all the missing controllers.
+ for name, ctl := range r.controllers {
+ if _, ok := ctlSet[name]; !ok {
+ cg := ctl.RootCgroup()
+ cg.IncRef() // Ref transferred to caller.
+ cgset[cg] = struct{}{}
+ }
+ }
+ return cgset
+}
+
+// GenerateProcCgroups writes the contents of /proc/cgroups to buf.
+func (r *CgroupRegistry) GenerateProcCgroups(buf *bytes.Buffer) {
+ r.mu.Lock()
+ entries := make([]string, 0, len(r.controllers))
+ for _, c := range r.controllers {
+ en := 0
+ if c.Enabled() {
+ en = 1
+ }
+ entries = append(entries, fmt.Sprintf("%s\t%d\t%d\t%d\n", c.Type(), c.HierarchyID(), c.NumCgroups(), en))
+ }
+ r.mu.Unlock()
+
+ sort.Strings(entries)
+ fmt.Fprint(buf, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n")
+ for _, e := range entries {
+ fmt.Fprint(buf, e)
+ }
+}
diff --git a/pkg/sentry/kernel/eventfd/BUILD b/pkg/sentry/kernel/eventfd/BUILD
index 7ecbd29ab..564c3d42e 100644
--- a/pkg/sentry/kernel/eventfd/BUILD
+++ b/pkg/sentry/kernel/eventfd/BUILD
@@ -10,6 +10,7 @@ go_library(
"//pkg/abi/linux",
"//pkg/context",
"//pkg/fdnotifier",
+ "//pkg/hostarch",
"//pkg/sentry/fs",
"//pkg/sentry/fs/anon",
"//pkg/sentry/fs/fsutil",
diff --git a/pkg/sentry/kernel/eventfd/eventfd.go b/pkg/sentry/kernel/eventfd/eventfd.go
index 2aca02fd5..4466fbc9d 100644
--- a/pkg/sentry/kernel/eventfd/eventfd.go
+++ b/pkg/sentry/kernel/eventfd/eventfd.go
@@ -23,6 +23,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fdnotifier"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/anon"
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
@@ -186,7 +187,7 @@ func (e *EventOperations) read(ctx context.Context, dst usermem.IOSequence) erro
e.wq.Notify(waiter.WritableEvents)
var buf [8]byte
- usermem.ByteOrder.PutUint64(buf[:], val)
+ hostarch.ByteOrder.PutUint64(buf[:], val)
_, err := dst.CopyOut(ctx, buf[:])
return err
}
@@ -194,7 +195,7 @@ func (e *EventOperations) read(ctx context.Context, dst usermem.IOSequence) erro
// Must be called with e.mu locked.
func (e *EventOperations) hostWrite(val uint64) error {
var buf [8]byte
- usermem.ByteOrder.PutUint64(buf[:], val)
+ hostarch.ByteOrder.PutUint64(buf[:], val)
_, err := unix.Write(e.hostfd, buf[:])
if err == unix.EWOULDBLOCK {
return syserror.ErrWouldBlock
@@ -207,7 +208,7 @@ func (e *EventOperations) write(ctx context.Context, src usermem.IOSequence) err
if _, err := src.CopyIn(ctx, buf[:]); err != nil {
return err
}
- val := usermem.ByteOrder.Uint64(buf[:])
+ val := hostarch.ByteOrder.Uint64(buf[:])
return e.Signal(val)
}
diff --git a/pkg/sentry/kernel/futex/BUILD b/pkg/sentry/kernel/futex/BUILD
index 041e3d4ca..a75686cf3 100644
--- a/pkg/sentry/kernel/futex/BUILD
+++ b/pkg/sentry/kernel/futex/BUILD
@@ -37,6 +37,7 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/hostarch",
"//pkg/log",
"//pkg/sentry/memmap",
"//pkg/sync",
@@ -52,8 +53,8 @@ go_test(
library = ":futex",
deps = [
"//pkg/context",
+ "//pkg/hostarch",
"//pkg/sync",
- "//pkg/usermem",
"@org_golang_x_sys//unix:go_default_library",
],
)
diff --git a/pkg/sentry/kernel/futex/futex.go b/pkg/sentry/kernel/futex/futex.go
index e4dcc4d40..0427cf3f4 100644
--- a/pkg/sentry/kernel/futex/futex.go
+++ b/pkg/sentry/kernel/futex/futex.go
@@ -20,10 +20,10 @@ package futex
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
// KeyKind indicates the type of a Key.
@@ -83,8 +83,8 @@ func (k *Key) clone() Key {
}
// Preconditions: k.Kind == KindPrivate or KindSharedPrivate.
-func (k *Key) addr() usermem.Addr {
- return usermem.Addr(k.Offset)
+func (k *Key) addr() hostarch.Addr {
+ return hostarch.Addr(k.Offset)
}
// matches returns true if a wakeup on k2 should wake a waiter waiting on k.
@@ -97,14 +97,14 @@ func (k *Key) matches(k2 *Key) bool {
type Target interface {
context.Context
- // SwapUint32 gives access to usermem.IO.SwapUint32.
- SwapUint32(addr usermem.Addr, new uint32) (uint32, error)
+ // SwapUint32 gives access to hostarch.IO.SwapUint32.
+ SwapUint32(addr hostarch.Addr, new uint32) (uint32, error)
- // CompareAndSwap gives access to usermem.IO.CompareAndSwapUint32.
- CompareAndSwapUint32(addr usermem.Addr, old, new uint32) (uint32, error)
+ // CompareAndSwap gives access to hostarch.IO.CompareAndSwapUint32.
+ CompareAndSwapUint32(addr hostarch.Addr, old, new uint32) (uint32, error)
- // LoadUint32 gives access to usermem.IO.LoadUint32.
- LoadUint32(addr usermem.Addr) (uint32, error)
+ // LoadUint32 gives access to hostarch.IO.LoadUint32.
+ LoadUint32(addr hostarch.Addr) (uint32, error)
// GetSharedKey returns a Key with kind KindSharedPrivate or
// KindSharedMappable corresponding to the memory mapped at address addr.
@@ -112,11 +112,11 @@ type Target interface {
// If GetSharedKey returns a Key with a non-nil MappingIdentity, a
// reference is held on the MappingIdentity, which must be dropped by the
// caller when the Key is no longer in use.
- GetSharedKey(addr usermem.Addr) (Key, error)
+ GetSharedKey(addr hostarch.Addr) (Key, error)
}
// check performs a basic equality check on the given address.
-func check(t Target, addr usermem.Addr, val uint32) error {
+func check(t Target, addr hostarch.Addr, val uint32) error {
cur, err := t.LoadUint32(addr)
if err != nil {
return err
@@ -128,7 +128,7 @@ func check(t Target, addr usermem.Addr, val uint32) error {
}
// atomicOp performs a complex operation on the given address.
-func atomicOp(t Target, addr usermem.Addr, opIn uint32) (bool, error) {
+func atomicOp(t Target, addr hostarch.Addr, opIn uint32) (bool, error) {
opType := (opIn >> 28) & 0xf
cmp := (opIn >> 24) & 0xf
opArg := (opIn >> 12) & 0xfff
@@ -328,7 +328,7 @@ const (
)
// getKey returns a Key representing address addr in c.
-func getKey(t Target, addr usermem.Addr, private bool) (Key, error) {
+func getKey(t Target, addr hostarch.Addr, private bool) (Key, error) {
// Ensure the address is aligned.
// It must be a DWORD boundary.
if addr&0x3 != 0 {
@@ -341,7 +341,7 @@ func getKey(t Target, addr usermem.Addr, private bool) (Key, error) {
}
// bucketIndexForAddr returns the index into Manager.buckets for addr.
-func bucketIndexForAddr(addr usermem.Addr) uintptr {
+func bucketIndexForAddr(addr hostarch.Addr) uintptr {
// - The bottom 2 bits of addr must be 0, per getKey.
//
// - On amd64, the top 16 bits of addr (bits 48-63) must be equal to bit 47
@@ -448,7 +448,7 @@ func (m *Manager) lockBuckets(k1, k2 *Key) (*bucket, *bucket) {
// Wake wakes up to n waiters matching the bitmask on the given addr.
// The number of waiters woken is returned.
-func (m *Manager) Wake(t Target, addr usermem.Addr, private bool, bitmask uint32, n int) (int, error) {
+func (m *Manager) Wake(t Target, addr hostarch.Addr, private bool, bitmask uint32, n int) (int, error) {
// This function is very hot; avoid defer.
k, err := getKey(t, addr, private)
if err != nil {
@@ -463,7 +463,7 @@ func (m *Manager) Wake(t Target, addr usermem.Addr, private bool, bitmask uint32
return r, nil
}
-func (m *Manager) doRequeue(t Target, addr, naddr usermem.Addr, private bool, checkval bool, val uint32, nwake int, nreq int) (int, error) {
+func (m *Manager) doRequeue(t Target, addr, naddr hostarch.Addr, private bool, checkval bool, val uint32, nwake int, nreq int) (int, error) {
k1, err := getKey(t, addr, private)
if err != nil {
return 0, err
@@ -498,14 +498,14 @@ func (m *Manager) doRequeue(t Target, addr, naddr usermem.Addr, private bool, ch
// Requeue wakes up to nwake waiters on the given addr, and unconditionally
// requeues up to nreq waiters on naddr.
-func (m *Manager) Requeue(t Target, addr, naddr usermem.Addr, private bool, nwake int, nreq int) (int, error) {
+func (m *Manager) Requeue(t Target, addr, naddr hostarch.Addr, private bool, nwake int, nreq int) (int, error) {
return m.doRequeue(t, addr, naddr, private, false, 0, nwake, nreq)
}
// RequeueCmp atomically checks that the addr contains val (via the Target),
// wakes up to nwake waiters on addr and then unconditionally requeues nreq
// waiters on naddr.
-func (m *Manager) RequeueCmp(t Target, addr, naddr usermem.Addr, private bool, val uint32, nwake int, nreq int) (int, error) {
+func (m *Manager) RequeueCmp(t Target, addr, naddr hostarch.Addr, private bool, val uint32, nwake int, nreq int) (int, error) {
return m.doRequeue(t, addr, naddr, private, true, val, nwake, nreq)
}
@@ -513,7 +513,7 @@ func (m *Manager) RequeueCmp(t Target, addr, naddr usermem.Addr, private bool, v
// waiters unconditionally from addr1, and, based on the original value at addr2
// and a comparison encoded in op, wakes up to nwake2 waiters from addr2.
// It returns the total number of waiters woken.
-func (m *Manager) WakeOp(t Target, addr1, addr2 usermem.Addr, private bool, nwake1 int, nwake2 int, op uint32) (int, error) {
+func (m *Manager) WakeOp(t Target, addr1, addr2 hostarch.Addr, private bool, nwake1 int, nwake2 int, op uint32) (int, error) {
k1, err := getKey(t, addr1, private)
if err != nil {
return 0, err
@@ -553,7 +553,7 @@ func (m *Manager) WakeOp(t Target, addr1, addr2 usermem.Addr, private bool, nwak
// enqueues w to be woken by a send to w.C. If WaitPrepare returns nil, the
// Waiter must be subsequently removed by calling WaitComplete, whether or not
// a wakeup is received on w.C.
-func (m *Manager) WaitPrepare(w *Waiter, t Target, addr usermem.Addr, private bool, val uint32, bitmask uint32) error {
+func (m *Manager) WaitPrepare(w *Waiter, t Target, addr hostarch.Addr, private bool, val uint32, bitmask uint32) error {
k, err := getKey(t, addr, private)
if err != nil {
return err
@@ -631,7 +631,7 @@ func (m *Manager) WaitComplete(w *Waiter, t Target) {
// FUTEX_OWNER_DIED is only set by the Linux when robust lists are in use (see
// exit_robust_list()). Given we don't support robust lists, although handled
// below, it's never set.
-func (m *Manager) LockPI(w *Waiter, t Target, addr usermem.Addr, tid uint32, private, try bool) (bool, error) {
+func (m *Manager) LockPI(w *Waiter, t Target, addr hostarch.Addr, tid uint32, private, try bool) (bool, error) {
k, err := getKey(t, addr, private)
if err != nil {
return false, err
@@ -663,7 +663,7 @@ func (m *Manager) LockPI(w *Waiter, t Target, addr usermem.Addr, tid uint32, pri
return success, nil
}
-func (m *Manager) lockPILocked(w *Waiter, t Target, addr usermem.Addr, tid uint32, b *bucket, try bool) (bool, error) {
+func (m *Manager) lockPILocked(w *Waiter, t Target, addr hostarch.Addr, tid uint32, b *bucket, try bool) (bool, error) {
for {
cur, err := t.LoadUint32(addr)
if err != nil {
@@ -724,7 +724,7 @@ func (m *Manager) lockPILocked(w *Waiter, t Target, addr usermem.Addr, tid uint3
// The address provided must contain the caller's TID. If there are waiters,
// TID of the next waiter (FIFO) is set to the given address, and the waiter
// woken up. If there are no waiters, 0 is set to the address.
-func (m *Manager) UnlockPI(t Target, addr usermem.Addr, tid uint32, private bool) error {
+func (m *Manager) UnlockPI(t Target, addr hostarch.Addr, tid uint32, private bool) error {
k, err := getKey(t, addr, private)
if err != nil {
return err
@@ -738,7 +738,7 @@ func (m *Manager) UnlockPI(t Target, addr usermem.Addr, tid uint32, private bool
return err
}
-func (m *Manager) unlockPILocked(t Target, addr usermem.Addr, tid uint32, b *bucket, key *Key) error {
+func (m *Manager) unlockPILocked(t Target, addr hostarch.Addr, tid uint32, b *bucket, key *Key) error {
cur, err := t.LoadUint32(addr)
if err != nil {
return err
diff --git a/pkg/sentry/kernel/futex/futex_test.go b/pkg/sentry/kernel/futex/futex_test.go
index ba7f95d8a..deba44e5c 100644
--- a/pkg/sentry/kernel/futex/futex_test.go
+++ b/pkg/sentry/kernel/futex/futex_test.go
@@ -23,8 +23,8 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/usermem"
)
// testData implements the Target interface, and allows us to
@@ -43,23 +43,23 @@ func newTestData(size uint) testData {
}
}
-func (t testData) SwapUint32(addr usermem.Addr, new uint32) (uint32, error) {
+func (t testData) SwapUint32(addr hostarch.Addr, new uint32) (uint32, error) {
val := atomic.SwapUint32((*uint32)(unsafe.Pointer(&t.data[addr])), new)
return val, nil
}
-func (t testData) CompareAndSwapUint32(addr usermem.Addr, old, new uint32) (uint32, error) {
+func (t testData) CompareAndSwapUint32(addr hostarch.Addr, old, new uint32) (uint32, error) {
if atomic.CompareAndSwapUint32((*uint32)(unsafe.Pointer(&t.data[addr])), old, new) {
return old, nil
}
return atomic.LoadUint32((*uint32)(unsafe.Pointer(&t.data[addr]))), nil
}
-func (t testData) LoadUint32(addr usermem.Addr) (uint32, error) {
+func (t testData) LoadUint32(addr hostarch.Addr) (uint32, error) {
return atomic.LoadUint32((*uint32)(unsafe.Pointer(&t.data[addr]))), nil
}
-func (t testData) GetSharedKey(addr usermem.Addr) (Key, error) {
+func (t testData) GetSharedKey(addr hostarch.Addr) (Key, error) {
return Key{
Kind: KindSharedMappable,
Offset: uint64(addr),
@@ -73,7 +73,7 @@ func futexKind(private bool) string {
return "shared"
}
-func newPreparedTestWaiter(t *testing.T, m *Manager, ta Target, addr usermem.Addr, private bool, val uint32, bitmask uint32) *Waiter {
+func newPreparedTestWaiter(t *testing.T, m *Manager, ta Target, addr hostarch.Addr, private bool, val uint32, bitmask uint32) *Waiter {
w := NewWaiter()
if err := m.WaitPrepare(w, ta, addr, private, val, bitmask); err != nil {
t.Fatalf("WaitPrepare failed: %v", err)
@@ -463,12 +463,12 @@ const (
// Beyond being used as a Locker, this is a simple mechanism for
// changing the underlying values for simpler tests.
type testMutex struct {
- a usermem.Addr
+ a hostarch.Addr
d testData
m *Manager
}
-func newTestMutex(addr usermem.Addr, d testData, m *Manager) *testMutex {
+func newTestMutex(addr hostarch.Addr, d testData, m *Manager) *testMutex {
return &testMutex{a: addr, d: d, m: m}
}
diff --git a/pkg/sentry/kernel/kcov.go b/pkg/sentry/kernel/kcov.go
index 4fcdfc541..4b943106b 100644
--- a/pkg/sentry/kernel/kcov.go
+++ b/pkg/sentry/kernel/kcov.go
@@ -22,13 +22,13 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/coverage"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/mm"
"gvisor.dev/gvisor/pkg/sentry/pgalloc"
"gvisor.dev/gvisor/pkg/sentry/usage"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
// kcovAreaSizeMax is the maximum number of uint64 entries allowed in the kcov
@@ -130,7 +130,7 @@ func (kcov *Kcov) InitTrace(size uint64) error {
// To simplify all the logic around mapping, we require that the length of the
// shared region is a multiple of the system page size.
- if (8*size)&(usermem.PageSize-1) != 0 {
+ if (8*size)&(hostarch.PageSize-1) != 0 {
return syserror.EINVAL
}
@@ -286,7 +286,7 @@ func (rw *kcovReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) {
}
// Get internal mappings.
- bs, err := rw.mf.MapInternal(memmap.FileRange{start, end}, usermem.Read)
+ bs, err := rw.mf.MapInternal(memmap.FileRange{start, end}, hostarch.Read)
if err != nil {
return 0, err
}
@@ -314,7 +314,7 @@ func (rw *kcovReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error)
}
// Get internal mapping.
- bs, err := rw.mf.MapInternal(memmap.FileRange{start, end}, usermem.Write)
+ bs, err := rw.mf.MapInternal(memmap.FileRange{start, end}, hostarch.Write)
if err != nil {
return 0, err
}
diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go
index 43065b45a..9a4fd64cb 100644
--- a/pkg/sentry/kernel/kernel.go
+++ b/pkg/sentry/kernel/kernel.go
@@ -294,6 +294,11 @@ type Kernel struct {
// YAMAPtraceScope is the current level of YAMA ptrace restrictions.
YAMAPtraceScope int32
+
+ // cgroupRegistry contains the set of active cgroup controllers on the
+ // system. It is controller by cgroupfs. Nil if cgroupfs is unavailable on
+ // the system.
+ cgroupRegistry *CgroupRegistry
}
// InitKernelArgs holds arguments to Init.
@@ -438,6 +443,8 @@ func (k *Kernel) Init(args InitKernelArgs) error {
k.socketMount = socketMount
k.socketsVFS2 = make(map[*vfs.FileDescription]*SocketRecord)
+
+ k.cgroupRegistry = newCgroupRegistry()
}
return nil
}
@@ -1815,6 +1822,11 @@ func (k *Kernel) SocketMount() *vfs.Mount {
return k.socketMount
}
+// CgroupRegistry returns the cgroup registry.
+func (k *Kernel) CgroupRegistry() *CgroupRegistry {
+ return k.cgroupRegistry
+}
+
// Release releases resources owned by k.
//
// Precondition: This should only be called after the kernel is fully
@@ -1831,3 +1843,43 @@ func (k *Kernel) Release() {
k.timekeeper.Destroy()
k.vdso.Release(ctx)
}
+
+// PopulateNewCgroupHierarchy moves all tasks into a newly created cgroup
+// hierarchy.
+//
+// Precondition: root must be a new cgroup with no tasks. This implies the
+// controllers for root are also new and currently manage no task, which in turn
+// implies the new cgroup can be populated without migrating tasks between
+// cgroups.
+func (k *Kernel) PopulateNewCgroupHierarchy(root Cgroup) {
+ k.tasks.mu.RLock()
+ k.tasks.forEachTaskLocked(func(t *Task) {
+ if t.ExitState() != TaskExitNone {
+ return
+ }
+ t.mu.Lock()
+ t.enterCgroupLocked(root)
+ t.mu.Unlock()
+ })
+ k.tasks.mu.RUnlock()
+}
+
+// ReleaseCgroupHierarchy moves all tasks out of all cgroups belonging to the
+// hierarchy with the provided id. This is intended for use during hierarchy
+// teardown, as otherwise the tasks would be orphaned w.r.t to some controllers.
+func (k *Kernel) ReleaseCgroupHierarchy(hid uint32) {
+ k.tasks.mu.RLock()
+ k.tasks.forEachTaskLocked(func(t *Task) {
+ if t.ExitState() != TaskExitNone {
+ return
+ }
+ t.mu.Lock()
+ for cg, _ := range t.cgroups {
+ if cg.HierarchyID() == hid {
+ t.leaveCgroupLocked(cg)
+ }
+ }
+ t.mu.Unlock()
+ })
+ k.tasks.mu.RUnlock()
+}
diff --git a/pkg/sentry/kernel/pipe/BUILD b/pkg/sentry/kernel/pipe/BUILD
index beba6d97d..34c617b08 100644
--- a/pkg/sentry/kernel/pipe/BUILD
+++ b/pkg/sentry/kernel/pipe/BUILD
@@ -21,6 +21,7 @@ go_library(
"//pkg/abi/linux",
"//pkg/amutex",
"//pkg/context",
+ "//pkg/hostarch",
"//pkg/marshal/primitive",
"//pkg/safemem",
"//pkg/sentry/arch",
diff --git a/pkg/sentry/kernel/pipe/pipe.go b/pkg/sentry/kernel/pipe/pipe.go
index d004f2357..06769931a 100644
--- a/pkg/sentry/kernel/pipe/pipe.go
+++ b/pkg/sentry/kernel/pipe/pipe.go
@@ -22,18 +22,18 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
const (
// MinimumPipeSize is a hard limit of the minimum size of a pipe.
// It corresponds to fs/pipe.c:pipe_min_size.
- MinimumPipeSize = usermem.PageSize
+ MinimumPipeSize = hostarch.PageSize
// MaximumPipeSize is a hard limit on the maximum size of a pipe.
// It corresponds to fs/pipe.c:pipe_max_size.
@@ -41,7 +41,7 @@ const (
// DefaultPipeSize is the system-wide default size of a pipe in bytes.
// It corresponds to pipe_fs_i.h:PIPE_DEF_BUFFERS.
- DefaultPipeSize = 16 * usermem.PageSize
+ DefaultPipeSize = 16 * hostarch.PageSize
// atomicIOBytes is the maximum number of bytes that the pipe will
// guarantee atomic reads or writes atomically.
diff --git a/pkg/sentry/kernel/pipe/vfs.go b/pkg/sentry/kernel/pipe/vfs.go
index e524afad5..95b948edb 100644
--- a/pkg/sentry/kernel/pipe/vfs.go
+++ b/pkg/sentry/kernel/pipe/vfs.go
@@ -17,6 +17,7 @@ package pipe
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/vfs"
@@ -274,7 +275,7 @@ func (fd *VFSPipeFD) SpliceToNonPipe(ctx context.Context, out *vfs.FileDescripti
}
src := usermem.IOSequence{
IO: fd,
- Addrs: usermem.AddrRangeSeqOf(usermem.AddrRange{0, usermem.Addr(count)}),
+ Addrs: hostarch.AddrRangeSeqOf(hostarch.AddrRange{0, hostarch.Addr(count)}),
}
var (
@@ -302,7 +303,7 @@ func (fd *VFSPipeFD) SpliceToNonPipe(ctx context.Context, out *vfs.FileDescripti
func (fd *VFSPipeFD) SpliceFromNonPipe(ctx context.Context, in *vfs.FileDescription, off, count int64) (int64, error) {
dst := usermem.IOSequence{
IO: fd,
- Addrs: usermem.AddrRangeSeqOf(usermem.AddrRange{0, usermem.Addr(count)}),
+ Addrs: hostarch.AddrRangeSeqOf(hostarch.AddrRange{0, hostarch.Addr(count)}),
}
var (
@@ -328,7 +329,7 @@ func (fd *VFSPipeFD) SpliceFromNonPipe(ctx context.Context, in *vfs.FileDescript
// fd.pipe.Notify(waiter.WritableEvents) after the read is completed.
//
// Preconditions: fd.pipe.mu must be locked.
-func (fd *VFSPipeFD) CopyIn(ctx context.Context, addr usermem.Addr, dst []byte, opts usermem.IOOpts) (int, error) {
+func (fd *VFSPipeFD) CopyIn(ctx context.Context, addr hostarch.Addr, dst []byte, opts usermem.IOOpts) (int, error) {
n, err := fd.pipe.peekLocked(int64(len(dst)), func(srcs safemem.BlockSeq) (uint64, error) {
return safemem.CopySeq(safemem.BlockSeqOf(safemem.BlockFromSafeSlice(dst)), srcs)
})
@@ -340,7 +341,7 @@ func (fd *VFSPipeFD) CopyIn(ctx context.Context, addr usermem.Addr, dst []byte,
// is completed.
//
// Preconditions: fd.pipe.mu must be locked.
-func (fd *VFSPipeFD) CopyOut(ctx context.Context, addr usermem.Addr, src []byte, opts usermem.IOOpts) (int, error) {
+func (fd *VFSPipeFD) CopyOut(ctx context.Context, addr hostarch.Addr, src []byte, opts usermem.IOOpts) (int, error) {
n, err := fd.pipe.writeLocked(int64(len(src)), func(dsts safemem.BlockSeq) (uint64, error) {
return safemem.CopySeq(dsts, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(src)))
})
@@ -350,7 +351,7 @@ func (fd *VFSPipeFD) CopyOut(ctx context.Context, addr usermem.Addr, src []byte,
// ZeroOut implements usermem.IO.ZeroOut.
//
// Preconditions: fd.pipe.mu must be locked.
-func (fd *VFSPipeFD) ZeroOut(ctx context.Context, addr usermem.Addr, toZero int64, opts usermem.IOOpts) (int64, error) {
+func (fd *VFSPipeFD) ZeroOut(ctx context.Context, addr hostarch.Addr, toZero int64, opts usermem.IOOpts) (int64, error) {
n, err := fd.pipe.writeLocked(toZero, func(dsts safemem.BlockSeq) (uint64, error) {
return safemem.ZeroSeq(dsts)
})
@@ -362,7 +363,7 @@ func (fd *VFSPipeFD) ZeroOut(ctx context.Context, addr usermem.Addr, toZero int6
// fd.pipe.Notify(waiter.WritableEvents) after the read is completed.
//
// Preconditions: fd.pipe.mu must be locked.
-func (fd *VFSPipeFD) CopyInTo(ctx context.Context, ars usermem.AddrRangeSeq, dst safemem.Writer, opts usermem.IOOpts) (int64, error) {
+func (fd *VFSPipeFD) CopyInTo(ctx context.Context, ars hostarch.AddrRangeSeq, dst safemem.Writer, opts usermem.IOOpts) (int64, error) {
return fd.pipe.peekLocked(ars.NumBytes(), func(srcs safemem.BlockSeq) (uint64, error) {
return dst.WriteFromBlocks(srcs)
})
@@ -373,25 +374,25 @@ func (fd *VFSPipeFD) CopyInTo(ctx context.Context, ars usermem.AddrRangeSeq, dst
// is completed.
//
// Preconditions: fd.pipe.mu must be locked.
-func (fd *VFSPipeFD) CopyOutFrom(ctx context.Context, ars usermem.AddrRangeSeq, src safemem.Reader, opts usermem.IOOpts) (int64, error) {
+func (fd *VFSPipeFD) CopyOutFrom(ctx context.Context, ars hostarch.AddrRangeSeq, src safemem.Reader, opts usermem.IOOpts) (int64, error) {
return fd.pipe.writeLocked(ars.NumBytes(), func(dsts safemem.BlockSeq) (uint64, error) {
return src.ReadToBlocks(dsts)
})
}
// SwapUint32 implements usermem.IO.SwapUint32.
-func (fd *VFSPipeFD) SwapUint32(ctx context.Context, addr usermem.Addr, new uint32, opts usermem.IOOpts) (uint32, error) {
+func (fd *VFSPipeFD) SwapUint32(ctx context.Context, addr hostarch.Addr, new uint32, opts usermem.IOOpts) (uint32, error) {
// How did a pipe get passed as the virtual address space to futex(2)?
panic("VFSPipeFD.SwapUint32 called unexpectedly")
}
// CompareAndSwapUint32 implements usermem.IO.CompareAndSwapUint32.
-func (fd *VFSPipeFD) CompareAndSwapUint32(ctx context.Context, addr usermem.Addr, old, new uint32, opts usermem.IOOpts) (uint32, error) {
+func (fd *VFSPipeFD) CompareAndSwapUint32(ctx context.Context, addr hostarch.Addr, old, new uint32, opts usermem.IOOpts) (uint32, error) {
panic("VFSPipeFD.CompareAndSwapUint32 called unexpectedly")
}
// LoadUint32 implements usermem.IO.LoadUint32.
-func (fd *VFSPipeFD) LoadUint32(ctx context.Context, addr usermem.Addr, opts usermem.IOOpts) (uint32, error) {
+func (fd *VFSPipeFD) LoadUint32(ctx context.Context, addr hostarch.Addr, opts usermem.IOOpts) (uint32, error) {
panic("VFSPipeFD.LoadUint32 called unexpectedly")
}
diff --git a/pkg/sentry/kernel/ptrace.go b/pkg/sentry/kernel/ptrace.go
index f5a60e749..57c7659e7 100644
--- a/pkg/sentry/kernel/ptrace.go
+++ b/pkg/sentry/kernel/ptrace.go
@@ -19,6 +19,7 @@ import (
"sync/atomic"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/marshal/primitive"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/mm"
@@ -1011,7 +1012,7 @@ func (t *Task) ptraceSetOptionsLocked(opts uintptr) error {
}
// Ptrace implements the ptrace system call.
-func (t *Task) Ptrace(req int64, pid ThreadID, addr, data usermem.Addr) error {
+func (t *Task) Ptrace(req int64, pid ThreadID, addr, data hostarch.Addr) error {
// PTRACE_TRACEME ignores all other arguments.
if req == linux.PTRACE_TRACEME {
return t.ptraceTraceme()
@@ -1190,7 +1191,7 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data usermem.Addr) error {
panic(fmt.Sprintf("%#x + %#x overflows. Invalid reg size > %#x", ar.Start, n, ar.Length()))
}
ar.End = end
- return t.CopyOutIovecs(data, usermem.AddrRangeSeqOf(ar))
+ return t.CopyOutIovecs(data, hostarch.AddrRangeSeqOf(ar))
case linux.PTRACE_SETREGSET:
ars, err := t.CopyInIovecs(data, 1)
@@ -1214,8 +1215,8 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data usermem.Addr) error {
return err
}
t.p.FullStateChanged()
- ar.End -= usermem.Addr(n)
- return t.CopyOutIovecs(data, usermem.AddrRangeSeqOf(ar))
+ ar.End -= hostarch.Addr(n)
+ return t.CopyOutIovecs(data, hostarch.AddrRangeSeqOf(ar))
case linux.PTRACE_GETSIGINFO:
t.tg.pidns.owner.mu.RLock()
@@ -1267,7 +1268,7 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data usermem.Addr) error {
case linux.PTRACE_GETEVENTMSG:
t.tg.pidns.owner.mu.RLock()
defer t.tg.pidns.owner.mu.RUnlock()
- _, err := primitive.CopyUint64Out(t, usermem.Addr(data), target.ptraceEventMsg)
+ _, err := primitive.CopyUint64Out(t, hostarch.Addr(data), target.ptraceEventMsg)
return err
// PEEKSIGINFO is unimplemented but seems to have no users anywhere.
diff --git a/pkg/sentry/kernel/ptrace_amd64.go b/pkg/sentry/kernel/ptrace_amd64.go
index 7aea3dcd8..5ae05b5c3 100644
--- a/pkg/sentry/kernel/ptrace_amd64.go
+++ b/pkg/sentry/kernel/ptrace_amd64.go
@@ -18,12 +18,13 @@ package kernel
import (
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
// ptraceArch implements arch-specific ptrace commands.
-func (t *Task) ptraceArch(target *Task, req int64, addr, data usermem.Addr) error {
+func (t *Task) ptraceArch(target *Task, req int64, addr, data hostarch.Addr) error {
switch req {
case linux.PTRACE_PEEKUSR: // aka PTRACE_PEEKUSER
n, err := target.Arch().PtracePeekUser(uintptr(addr))
diff --git a/pkg/sentry/kernel/ptrace_arm64.go b/pkg/sentry/kernel/ptrace_arm64.go
index d971b96b3..46dd84cbc 100644
--- a/pkg/sentry/kernel/ptrace_arm64.go
+++ b/pkg/sentry/kernel/ptrace_arm64.go
@@ -17,11 +17,11 @@
package kernel
import (
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
// ptraceArch implements arch-specific ptrace commands.
-func (t *Task) ptraceArch(target *Task, req int64, addr, data usermem.Addr) error {
+func (t *Task) ptraceArch(target *Task, req int64, addr, data hostarch.Addr) error {
return syserror.EIO
}
diff --git a/pkg/sentry/kernel/rseq.go b/pkg/sentry/kernel/rseq.go
index 2a9023fdf..4bc5bca44 100644
--- a/pkg/sentry/kernel/rseq.go
+++ b/pkg/sentry/kernel/rseq.go
@@ -18,6 +18,7 @@ import (
"fmt"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/hostcpu"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
@@ -43,8 +44,8 @@ type OldRSeqCriticalRegion struct {
// application handler while its instruction pointer is in CriticalSection,
// set the instruction pointer to Restart and application register r10 (on
// amd64) to the former instruction pointer.
- CriticalSection usermem.AddrRange
- Restart usermem.Addr
+ CriticalSection hostarch.AddrRange
+ Restart hostarch.Addr
}
// RSeqAvailable returns true if t supports (old and new) restartable sequences.
@@ -55,7 +56,7 @@ func (t *Task) RSeqAvailable() bool {
// SetRSeq registers addr as this thread's rseq structure.
//
// Preconditions: The caller must be running on the task goroutine.
-func (t *Task) SetRSeq(addr usermem.Addr, length, signature uint32) error {
+func (t *Task) SetRSeq(addr hostarch.Addr, length, signature uint32) error {
if t.rseqAddr != 0 {
if t.rseqAddr != addr {
return syserror.EINVAL
@@ -100,7 +101,7 @@ func (t *Task) SetRSeq(addr usermem.Addr, length, signature uint32) error {
// ClearRSeq unregisters addr as this thread's rseq structure.
//
// Preconditions: The caller must be running on the task goroutine.
-func (t *Task) ClearRSeq(addr usermem.Addr, length, signature uint32) error {
+func (t *Task) ClearRSeq(addr hostarch.Addr, length, signature uint32) error {
if t.rseqAddr == 0 {
return syserror.EINVAL
}
@@ -166,7 +167,7 @@ func (t *Task) SetOldRSeqCriticalRegion(r OldRSeqCriticalRegion) error {
// CPU number.
//
// Preconditions: The caller must be running on the task goroutine.
-func (t *Task) OldRSeqCPUAddr() usermem.Addr {
+func (t *Task) OldRSeqCPUAddr() hostarch.Addr {
return t.oldRSeqCPUAddr
}
@@ -177,7 +178,7 @@ func (t *Task) OldRSeqCPUAddr() usermem.Addr {
// * t.RSeqAvailable() == true.
// * The caller must be running on the task goroutine.
// * t's AddressSpace must be active.
-func (t *Task) SetOldRSeqCPUAddr(addr usermem.Addr) error {
+func (t *Task) SetOldRSeqCPUAddr(addr hostarch.Addr) error {
t.oldRSeqCPUAddr = addr
// Check that addr is writable.
@@ -221,7 +222,7 @@ func (t *Task) oldRSeqCopyOutCPU() error {
}
buf := t.CopyScratchBuffer(4)
- usermem.ByteOrder.PutUint32(buf, uint32(t.rseqCPU))
+ hostarch.ByteOrder.PutUint32(buf, uint32(t.rseqCPU))
_, err := t.CopyOutBytes(t.oldRSeqCPUAddr, buf)
return err
}
@@ -236,8 +237,8 @@ func (t *Task) rseqCopyOutCPU() error {
buf := t.CopyScratchBuffer(8)
// CPUIDStart and CPUID are the first two fields in linux.RSeq.
- usermem.ByteOrder.PutUint32(buf, uint32(t.rseqCPU)) // CPUIDStart
- usermem.ByteOrder.PutUint32(buf[4:], uint32(t.rseqCPU)) // CPUID
+ hostarch.ByteOrder.PutUint32(buf, uint32(t.rseqCPU)) // CPUIDStart
+ hostarch.ByteOrder.PutUint32(buf[4:], uint32(t.rseqCPU)) // CPUID
// N.B. This write is not atomic, but since this occurs on the task
// goroutine then as long as userspace uses a single-instruction read
// it can't see an invalid value.
@@ -251,8 +252,8 @@ func (t *Task) rseqCopyOutCPU() error {
func (t *Task) rseqClearCPU() error {
buf := t.CopyScratchBuffer(8)
// CPUIDStart and CPUID are the first two fields in linux.RSeq.
- usermem.ByteOrder.PutUint32(buf, 0) // CPUIDStart
- usermem.ByteOrder.PutUint32(buf[4:], linux.RSEQ_CPU_ID_UNINITIALIZED) // CPUID
+ hostarch.ByteOrder.PutUint32(buf, 0) // CPUIDStart
+ hostarch.ByteOrder.PutUint32(buf[4:], linux.RSEQ_CPU_ID_UNINITIALIZED) // CPUID
// N.B. This write is not atomic, but since this occurs on the task
// goroutine then as long as userspace uses a single-instruction read
// it can't see an invalid value.
@@ -305,7 +306,7 @@ func (t *Task) rseqAddrInterrupt() {
return
}
- critAddr := usermem.Addr(usermem.ByteOrder.Uint64(buf))
+ critAddr := hostarch.Addr(hostarch.ByteOrder.Uint64(buf))
if critAddr == 0 {
return
}
@@ -325,7 +326,7 @@ func (t *Task) rseqAddrInterrupt() {
return
}
- start := usermem.Addr(cs.Start)
+ start := hostarch.Addr(cs.Start)
critRange, ok := start.ToRange(cs.PostCommitOffset)
if !ok {
t.Debugf("Invalid start and offset in %+v", cs)
@@ -334,7 +335,7 @@ func (t *Task) rseqAddrInterrupt() {
return
}
- abort := usermem.Addr(cs.Abort)
+ abort := hostarch.Addr(cs.Abort)
if critRange.Contains(abort) {
t.Debugf("Abort in critical section in %+v", cs)
t.forceSignal(linux.SIGSEGV, false /* unconditional */)
@@ -353,7 +354,7 @@ func (t *Task) rseqAddrInterrupt() {
return
}
- sig := usermem.ByteOrder.Uint32(buf)
+ sig := hostarch.ByteOrder.Uint32(buf)
if sig != t.rseqSignature {
t.Debugf("Mismatched rseq signature %d != %d", sig, t.rseqSignature)
t.forceSignal(linux.SIGSEGV, false /* unconditional */)
@@ -376,7 +377,7 @@ func (t *Task) rseqAddrInterrupt() {
}
// Finally we can actually decide whether or not to restart.
- if !critRange.Contains(usermem.Addr(t.Arch().IP())) {
+ if !critRange.Contains(hostarch.Addr(t.Arch().IP())) {
return
}
@@ -386,7 +387,7 @@ func (t *Task) rseqAddrInterrupt() {
// Preconditions: The caller must be running on the task goroutine.
func (t *Task) oldRSeqInterrupt() {
r := t.tg.oldRSeqCritical.Load().(*OldRSeqCriticalRegion)
- if ip := t.Arch().IP(); r.CriticalSection.Contains(usermem.Addr(ip)) {
+ if ip := t.Arch().IP(); r.CriticalSection.Contains(hostarch.Addr(ip)) {
t.Debugf("Interrupted rseq critical section at %#x; restarting at %#x", ip, r.Restart)
t.Arch().SetIP(uintptr(r.Restart))
t.Arch().SetOldRSeqInterruptedIP(ip)
diff --git a/pkg/sentry/kernel/seccomp.go b/pkg/sentry/kernel/seccomp.go
index 8163a6132..a95e174a2 100644
--- a/pkg/sentry/kernel/seccomp.go
+++ b/pkg/sentry/kernel/seccomp.go
@@ -18,9 +18,9 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/bpf"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
const maxSyscallFilterInstructions = 1 << 15
@@ -35,11 +35,11 @@ func dataAsBPFInput(t *Task, d *linux.SeccompData) bpf.Input {
return bpf.InputBytes{
Data: buf,
// Go-marshal always uses the native byte order.
- Order: usermem.ByteOrder,
+ Order: hostarch.ByteOrder,
}
}
-func seccompSiginfo(t *Task, errno, sysno int32, ip usermem.Addr) *arch.SignalInfo {
+func seccompSiginfo(t *Task, errno, sysno int32, ip hostarch.Addr) *arch.SignalInfo {
si := &arch.SignalInfo{
Signo: int32(linux.SIGSYS),
Errno: errno,
@@ -56,7 +56,7 @@ func seccompSiginfo(t *Task, errno, sysno int32, ip usermem.Addr) *arch.SignalIn
// in because vsyscalls do not use the values in t.Arch().)
//
// Preconditions: The caller must be running on the task goroutine.
-func (t *Task) checkSeccompSyscall(sysno int32, args arch.SyscallArguments, ip usermem.Addr) linux.BPFAction {
+func (t *Task) checkSeccompSyscall(sysno int32, args arch.SyscallArguments, ip hostarch.Addr) linux.BPFAction {
result := linux.BPFAction(t.evaluateSyscallFilters(sysno, args, ip))
action := result & linux.SECCOMP_RET_ACTION
switch action {
@@ -102,7 +102,7 @@ func (t *Task) checkSeccompSyscall(sysno int32, args arch.SyscallArguments, ip u
return action
}
-func (t *Task) evaluateSyscallFilters(sysno int32, args arch.SyscallArguments, ip usermem.Addr) uint32 {
+func (t *Task) evaluateSyscallFilters(sysno int32, args arch.SyscallArguments, ip hostarch.Addr) uint32 {
data := linux.SeccompData{
Nr: sysno,
Arch: t.image.st.AuditNumber,
diff --git a/pkg/sentry/kernel/shm/BUILD b/pkg/sentry/kernel/shm/BUILD
index 073e14507..1c3c0794f 100644
--- a/pkg/sentry/kernel/shm/BUILD
+++ b/pkg/sentry/kernel/shm/BUILD
@@ -28,6 +28,7 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/hostarch",
"//pkg/log",
"//pkg/refs",
"//pkg/refsvfs2",
diff --git a/pkg/sentry/kernel/shm/shm.go b/pkg/sentry/kernel/shm/shm.go
index 92d60ba78..a73f1bdca 100644
--- a/pkg/sentry/kernel/shm/shm.go
+++ b/pkg/sentry/kernel/shm/shm.go
@@ -38,6 +38,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@@ -47,7 +48,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/usage"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
// Key represents a shm segment key. Analogous to a file name.
@@ -197,13 +197,13 @@ func (r *Registry) FindOrCreate(ctx context.Context, pid int32, key Key, size ui
}
var sizeAligned uint64
- if val, ok := usermem.Addr(size).RoundUp(); ok {
+ if val, ok := hostarch.Addr(size).RoundUp(); ok {
sizeAligned = uint64(val)
} else {
return nil, syserror.EINVAL
}
- if numPages := sizeAligned / usermem.PageSize; r.totalPages+numPages > linux.SHMALL {
+ if numPages := sizeAligned / hostarch.PageSize; r.totalPages+numPages > linux.SHMALL {
// "... allocating a segment of the requested size would cause the
// system to exceed the system-wide limit on shared memory (SHMALL)."
// - man shmget(2)
@@ -232,7 +232,7 @@ func (r *Registry) newShm(ctx context.Context, pid int32, key Key, creator fs.Fi
panic(fmt.Sprintf("context.Context %T lacks non-nil value for key %T", ctx, pgalloc.CtxMemoryFileProvider))
}
- effectiveSize := uint64(usermem.Addr(size).MustRoundUp())
+ effectiveSize := uint64(hostarch.Addr(size).MustRoundUp())
fr, err := mfp.MemoryFile().Allocate(effectiveSize, usage.Anonymous)
if err != nil {
return nil, err
@@ -267,7 +267,7 @@ func (r *Registry) newShm(ctx context.Context, pid int32, key Key, creator fs.Fi
r.shms[id] = shm
r.keysToShms[key] = shm
- r.totalPages += effectiveSize / usermem.PageSize
+ r.totalPages += effectiveSize / hostarch.PageSize
return shm, nil
}
@@ -318,7 +318,7 @@ func (r *Registry) remove(s *Shm) {
}
delete(r.shms, s.ID)
- r.totalPages -= s.effectiveSize / usermem.PageSize
+ r.totalPages -= s.effectiveSize / hostarch.PageSize
}
// Release drops the self-reference of each active shm segment in the registry.
@@ -386,7 +386,7 @@ type Shm struct {
// effectiveSize of the segment, rounding up to the next page
// boundary. Immutable.
//
- // Invariant: effectiveSize must be a multiple of usermem.PageSize.
+ // Invariant: effectiveSize must be a multiple of hostarch.PageSize.
effectiveSize uint64
// fr is the offset into mfp.MemoryFile() that backs this contents of this
@@ -467,7 +467,7 @@ func (s *Shm) Msync(context.Context, memmap.MappableRange) error {
}
// AddMapping implements memmap.Mappable.AddMapping.
-func (s *Shm) AddMapping(ctx context.Context, _ memmap.MappingSpace, _ usermem.AddrRange, _ uint64, _ bool) error {
+func (s *Shm) AddMapping(ctx context.Context, _ memmap.MappingSpace, _ hostarch.AddrRange, _ uint64, _ bool) error {
s.mu.Lock()
defer s.mu.Unlock()
s.attachTime = ktime.NowFromContext(ctx)
@@ -482,7 +482,7 @@ func (s *Shm) AddMapping(ctx context.Context, _ memmap.MappingSpace, _ usermem.A
}
// RemoveMapping implements memmap.Mappable.RemoveMapping.
-func (s *Shm) RemoveMapping(ctx context.Context, _ memmap.MappingSpace, _ usermem.AddrRange, _ uint64, _ bool) {
+func (s *Shm) RemoveMapping(ctx context.Context, _ memmap.MappingSpace, _ hostarch.AddrRange, _ uint64, _ bool) {
s.mu.Lock()
defer s.mu.Unlock()
// RemoveMapping may be called during task exit, when ctx
@@ -503,12 +503,12 @@ func (s *Shm) RemoveMapping(ctx context.Context, _ memmap.MappingSpace, _ userme
}
// CopyMapping implements memmap.Mappable.CopyMapping.
-func (*Shm) CopyMapping(context.Context, memmap.MappingSpace, usermem.AddrRange, usermem.AddrRange, uint64, bool) error {
+func (*Shm) CopyMapping(context.Context, memmap.MappingSpace, hostarch.AddrRange, hostarch.AddrRange, uint64, bool) error {
return nil
}
// Translate implements memmap.Mappable.Translate.
-func (s *Shm) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) {
+func (s *Shm) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) {
var err error
if required.End > s.fr.Length() {
err = &memmap.BusError{syserror.EFAULT}
@@ -519,7 +519,7 @@ func (s *Shm) Translate(ctx context.Context, required, optional memmap.MappableR
Source: source,
File: s.mfp.MemoryFile(),
Offset: s.fr.Start + source.Start,
- Perms: usermem.AnyAccess,
+ Perms: hostarch.AnyAccess,
},
}, err
}
@@ -543,7 +543,7 @@ type AttachOpts struct {
//
// Postconditions: The returned MMapOpts are valid only as long as a reference
// continues to be held on s.
-func (s *Shm) ConfigureAttach(ctx context.Context, addr usermem.Addr, opts AttachOpts) (memmap.MMapOpts, error) {
+func (s *Shm) ConfigureAttach(ctx context.Context, addr hostarch.Addr, opts AttachOpts) (memmap.MMapOpts, error) {
s.mu.Lock()
defer s.mu.Unlock()
if s.pendingDestruction && s.ReadRefs() == 0 {
@@ -565,12 +565,12 @@ func (s *Shm) ConfigureAttach(ctx context.Context, addr usermem.Addr, opts Attac
Offset: 0,
Addr: addr,
Fixed: opts.Remap,
- Perms: usermem.AccessType{
+ Perms: hostarch.AccessType{
Read: true,
Write: !opts.Readonly,
Execute: opts.Execute,
},
- MaxPerms: usermem.AnyAccess,
+ MaxPerms: hostarch.AnyAccess,
Mappable: s,
MappingIdentity: s,
}, nil
diff --git a/pkg/sentry/kernel/syscalls.go b/pkg/sentry/kernel/syscalls.go
index 332bdb8e8..953d4310e 100644
--- a/pkg/sentry/kernel/syscalls.go
+++ b/pkg/sentry/kernel/syscalls.go
@@ -20,9 +20,9 @@ import (
"gvisor.dev/gvisor/pkg/abi"
"gvisor.dev/gvisor/pkg/bits"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/usermem"
)
// maxSyscallNum is the highest supported syscall number.
@@ -243,7 +243,7 @@ type SyscallTable struct {
// Emulate is a collection of instruction addresses to emulate. The
// keys are addresses, and the values are system call numbers.
- Emulate map[usermem.Addr]uintptr
+ Emulate map[hostarch.Addr]uintptr
// The function to call in case of a missing system call.
Missing MissingFn
@@ -316,7 +316,7 @@ func (s *SyscallTable) Init() {
}
if s.Emulate == nil {
// Ensure non-nil emulate table.
- s.Emulate = make(map[usermem.Addr]uintptr)
+ s.Emulate = make(map[hostarch.Addr]uintptr)
}
max := s.MaxSysno() // Checked during RegisterSyscallTable.
@@ -359,7 +359,7 @@ func (s *SyscallTable) LookupNo(name string) (uintptr, error) {
}
// LookupEmulate looks up an emulation syscall number.
-func (s *SyscallTable) LookupEmulate(addr usermem.Addr) (uintptr, bool) {
+func (s *SyscallTable) LookupEmulate(addr hostarch.Addr) (uintptr, bool) {
sysno, ok := s.Emulate[addr]
return sysno, ok
}
diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go
index 36141dd09..be1371855 100644
--- a/pkg/sentry/kernel/task.go
+++ b/pkg/sentry/kernel/task.go
@@ -21,6 +21,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/bpf"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/inet"
@@ -33,7 +34,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -470,7 +470,7 @@ type Task struct {
// ThreadID to 0, and wake any futex waiters.
//
// cleartid is exclusive to the task goroutine.
- cleartid usermem.Addr
+ cleartid hostarch.Addr
// This is mostly a fake cpumask just for sched_set/getaffinity as we
// don't really control the affinity.
@@ -540,12 +540,12 @@ type Task struct {
// oldRSeqCPUAddr is a pointer to the userspace old rseq CPU variable.
//
// oldRSeqCPUAddr is exclusive to the task goroutine.
- oldRSeqCPUAddr usermem.Addr
+ oldRSeqCPUAddr hostarch.Addr
// rseqAddr is a pointer to the userspace linux.RSeq structure.
//
// rseqAddr is exclusive to the task goroutine.
- rseqAddr usermem.Addr
+ rseqAddr hostarch.Addr
// rseqSignature is the signature that the rseq abort IP must be signed
// with.
@@ -575,7 +575,7 @@ type Task struct {
// robustList is a pointer to the head of the tasks's robust futex
// list.
- robustList usermem.Addr
+ robustList hostarch.Addr
// startTime is the real time at which the task started. It is set when
// a Task is created or invokes execve(2).
@@ -587,6 +587,12 @@ type Task struct {
//
// kcov is exclusive to the task goroutine.
kcov *Kcov
+
+ // cgroups is the set of cgroups this task belongs to. This may be empty if
+ // no cgroup controllers are enabled. Protected by mu.
+ //
+ // +checklocks:mu
+ cgroups map[Cgroup]struct{}
}
func (t *Task) savePtraceTracer() *Task {
@@ -652,7 +658,7 @@ func (t *Task) Kernel() *Kernel {
// SetClearTID sets t's cleartid.
//
// Preconditions: The caller must be running on the task goroutine.
-func (t *Task) SetClearTID(addr usermem.Addr) {
+func (t *Task) SetClearTID(addr hostarch.Addr) {
t.cleartid = addr
}
diff --git a/pkg/sentry/kernel/task_cgroup.go b/pkg/sentry/kernel/task_cgroup.go
new file mode 100644
index 000000000..25d2504fa
--- /dev/null
+++ b/pkg/sentry/kernel/task_cgroup.go
@@ -0,0 +1,138 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package kernel
+
+import (
+ "bytes"
+ "fmt"
+ "sort"
+ "strings"
+
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/syserror"
+)
+
+// EnterInitialCgroups moves t into an initial set of cgroups.
+//
+// Precondition: t isn't in any cgroups yet, t.cgs is empty.
+//
+// +checklocksignore parent.mu is conditionally acquired.
+func (t *Task) EnterInitialCgroups(parent *Task) {
+ var inherit map[Cgroup]struct{}
+ if parent != nil {
+ parent.mu.Lock()
+ defer parent.mu.Unlock()
+ inherit = parent.cgroups
+ }
+ joinSet := t.k.cgroupRegistry.computeInitialGroups(inherit)
+
+ t.mu.Lock()
+ defer t.mu.Unlock()
+ // Transfer ownership of joinSet refs to the task's cgset.
+ t.cgroups = joinSet
+ for c, _ := range t.cgroups {
+ // Since t isn't in any cgroup yet, we can skip the check against
+ // existing cgroups.
+ c.Enter(t)
+ }
+}
+
+// EnterCgroup moves t into c.
+func (t *Task) EnterCgroup(c Cgroup) error {
+ newControllers := make(map[CgroupControllerType]struct{})
+ for _, ctl := range c.Controllers() {
+ newControllers[ctl.Type()] = struct{}{}
+ }
+
+ t.mu.Lock()
+ defer t.mu.Unlock()
+
+ for oldCG, _ := range t.cgroups {
+ for _, oldCtl := range oldCG.Controllers() {
+ if _, ok := newControllers[oldCtl.Type()]; ok {
+ // Already in a cgroup with the same controller as one of the
+ // new ones. Requires migration between cgroups.
+ //
+ // TODO(b/183137098): Implement cgroup migration.
+ log.Warningf("Cgroup migration is not implemented")
+ return syserror.EBUSY
+ }
+ }
+ }
+
+ // No migration required.
+ t.enterCgroupLocked(c)
+
+ return nil
+}
+
+// +checklocks:t.mu
+func (t *Task) enterCgroupLocked(c Cgroup) {
+ c.IncRef()
+ t.cgroups[c] = struct{}{}
+ c.Enter(t)
+}
+
+// LeaveCgroups removes t out from all its cgroups.
+func (t *Task) LeaveCgroups() {
+ t.mu.Lock()
+ defer t.mu.Unlock()
+ for c, _ := range t.cgroups {
+ t.leaveCgroupLocked(c)
+ }
+}
+
+// +checklocks:t.mu
+func (t *Task) leaveCgroupLocked(c Cgroup) {
+ c.Leave(t)
+ delete(t.cgroups, c)
+ c.decRef()
+}
+
+// taskCgroupEntry represents a line in /proc/<pid>/cgroup, and is used to
+// format a cgroup for display.
+type taskCgroupEntry struct {
+ hierarchyID uint32
+ controllers string
+ path string
+}
+
+// GenerateProcTaskCgroup writes the contents of /proc/<pid>/cgroup for t to buf.
+func (t *Task) GenerateProcTaskCgroup(buf *bytes.Buffer) {
+ t.mu.Lock()
+ defer t.mu.Unlock()
+
+ cgEntries := make([]taskCgroupEntry, 0, len(t.cgroups))
+ for c, _ := range t.cgroups {
+ ctls := c.Controllers()
+ ctlNames := make([]string, 0, len(ctls))
+ for _, ctl := range ctls {
+ ctlNames = append(ctlNames, string(ctl.Type()))
+ }
+
+ cgEntries = append(cgEntries, taskCgroupEntry{
+ // Note: We're guaranteed to have at least one controller, and all
+ // controllers are guaranteed to be on the same hierarchy.
+ hierarchyID: ctls[0].HierarchyID(),
+ controllers: strings.Join(ctlNames, ","),
+ path: c.Path(),
+ })
+ }
+
+ sort.Slice(cgEntries, func(i, j int) bool { return cgEntries[i].hierarchyID > cgEntries[j].hierarchyID })
+ for _, cgE := range cgEntries {
+ fmt.Fprintf(buf, "%d:%s:%s\n", cgE.hierarchyID, cgE.controllers, cgE.path)
+ }
+}
diff --git a/pkg/sentry/kernel/task_clone.go b/pkg/sentry/kernel/task_clone.go
index f305e69c0..405771f3f 100644
--- a/pkg/sentry/kernel/task_clone.go
+++ b/pkg/sentry/kernel/task_clone.go
@@ -20,6 +20,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/bpf"
"gvisor.dev/gvisor/pkg/cleanup"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/inet"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
@@ -85,12 +86,12 @@ type CloneOptions struct {
// Stack is the initial stack pointer of the new task. If Stack is 0, the
// new task will start with the same stack pointer as its parent.
- Stack usermem.Addr
+ Stack hostarch.Addr
// If SetTLS is true, set the new task's TLS (thread-local storage)
// descriptor to TLS. If SetTLS is false, TLS is ignored.
SetTLS bool
- TLS usermem.Addr
+ TLS hostarch.Addr
// If ChildClearTID is true, when the child exits, 0 is written to the
// address ChildTID in the child's memory, and if the write is successful a
@@ -101,7 +102,7 @@ type CloneOptions struct {
// Linux, failed writes are silently ignored.)
ChildClearTID bool
ChildSetTID bool
- ChildTID usermem.Addr
+ ChildTID hostarch.Addr
// If ParentSetTID is true, the child's thread ID (in the parent's PID
// namespace) is written to address ParentTID in the parent's memory. (As
@@ -112,7 +113,7 @@ type CloneOptions struct {
// and child's memory, but this is a documentation error fixed by
// 87ab04792ced ("clone.2: Fix description of CLONE_PARENT_SETTID").
ParentSetTID bool
- ParentTID usermem.Addr
+ ParentTID hostarch.Addr
// If Vfork is true, place the parent in vforkStop until the cloned task
// releases its TaskImage.
@@ -268,7 +269,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
}
tg := t.tg
- rseqAddr := usermem.Addr(0)
+ rseqAddr := hostarch.Addr(0)
rseqSignature := uint32(0)
if opts.NewThreadGroup {
if tg.mounts != nil {
diff --git a/pkg/sentry/kernel/task_exit.go b/pkg/sentry/kernel/task_exit.go
index ad59e4f60..b1af1a7ef 100644
--- a/pkg/sentry/kernel/task_exit.go
+++ b/pkg/sentry/kernel/task_exit.go
@@ -275,6 +275,10 @@ func (*runExitMain) execute(t *Task) taskRunState {
t.fsContext.DecRef(t)
t.fdTable.DecRef(t)
+ // Detach task from all cgroups. This must happen before potentially the
+ // last ref to the cgroupfs mount is dropped below.
+ t.LeaveCgroups()
+
t.mu.Lock()
if t.mountNamespaceVFS2 != nil {
t.mountNamespaceVFS2.DecRef(t)
diff --git a/pkg/sentry/kernel/task_futex.go b/pkg/sentry/kernel/task_futex.go
index 195c7da9b..4dc41b82b 100644
--- a/pkg/sentry/kernel/task_futex.go
+++ b/pkg/sentry/kernel/task_futex.go
@@ -16,6 +16,7 @@ package kernel
import (
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/marshal/primitive"
"gvisor.dev/gvisor/pkg/sentry/kernel/futex"
"gvisor.dev/gvisor/pkg/usermem"
@@ -30,33 +31,33 @@ func (t *Task) Futex() *futex.Manager {
}
// SwapUint32 implements futex.Target.SwapUint32.
-func (t *Task) SwapUint32(addr usermem.Addr, new uint32) (uint32, error) {
+func (t *Task) SwapUint32(addr hostarch.Addr, new uint32) (uint32, error) {
return t.MemoryManager().SwapUint32(t, addr, new, usermem.IOOpts{
AddressSpaceActive: true,
})
}
// CompareAndSwapUint32 implements futex.Target.CompareAndSwapUint32.
-func (t *Task) CompareAndSwapUint32(addr usermem.Addr, old, new uint32) (uint32, error) {
+func (t *Task) CompareAndSwapUint32(addr hostarch.Addr, old, new uint32) (uint32, error) {
return t.MemoryManager().CompareAndSwapUint32(t, addr, old, new, usermem.IOOpts{
AddressSpaceActive: true,
})
}
// LoadUint32 implements futex.Target.LoadUint32.
-func (t *Task) LoadUint32(addr usermem.Addr) (uint32, error) {
+func (t *Task) LoadUint32(addr hostarch.Addr) (uint32, error) {
return t.MemoryManager().LoadUint32(t, addr, usermem.IOOpts{
AddressSpaceActive: true,
})
}
// GetSharedKey implements futex.Target.GetSharedKey.
-func (t *Task) GetSharedKey(addr usermem.Addr) (futex.Key, error) {
+func (t *Task) GetSharedKey(addr hostarch.Addr) (futex.Key, error) {
return t.MemoryManager().GetSharedFutexKey(t, addr)
}
// GetRobustList sets the robust futex list for the task.
-func (t *Task) GetRobustList() usermem.Addr {
+func (t *Task) GetRobustList() hostarch.Addr {
t.mu.Lock()
addr := t.robustList
t.mu.Unlock()
@@ -64,7 +65,7 @@ func (t *Task) GetRobustList() usermem.Addr {
}
// SetRobustList sets the robust futex list for the task.
-func (t *Task) SetRobustList(addr usermem.Addr) {
+func (t *Task) SetRobustList(addr hostarch.Addr) {
t.mu.Lock()
t.robustList = addr
t.mu.Unlock()
@@ -84,28 +85,28 @@ func (t *Task) exitRobustList() {
}
var rl linux.RobustListHead
- if _, err := rl.CopyIn(t, usermem.Addr(addr)); err != nil {
+ if _, err := rl.CopyIn(t, hostarch.Addr(addr)); err != nil {
return
}
next := primitive.Uint64(rl.List)
done := 0
- var pendingLockAddr usermem.Addr
+ var pendingLockAddr hostarch.Addr
if rl.ListOpPending != 0 {
- pendingLockAddr = usermem.Addr(rl.ListOpPending + rl.FutexOffset)
+ pendingLockAddr = hostarch.Addr(rl.ListOpPending + rl.FutexOffset)
}
// Wake up normal elements.
- for usermem.Addr(next) != addr {
+ for hostarch.Addr(next) != addr {
// We traverse to the next element of the list before we
// actually wake anything. This prevents the race where waking
// this futex causes a modification of the list.
- thisLockAddr := usermem.Addr(uint64(next) + rl.FutexOffset)
+ thisLockAddr := hostarch.Addr(uint64(next) + rl.FutexOffset)
// Try to decode the next element in the list before waking the
// current futex. But don't check the error until after we've
// woken the current futex. Linux does it in this order too
- _, nextErr := next.CopyIn(t, usermem.Addr(next))
+ _, nextErr := next.CopyIn(t, hostarch.Addr(next))
// Wakeup the current futex if it's not pending.
if thisLockAddr != pendingLockAddr {
@@ -133,7 +134,7 @@ func (t *Task) exitRobustList() {
}
// wakeRobustListOne wakes a single futex from the robust list.
-func (t *Task) wakeRobustListOne(addr usermem.Addr) {
+func (t *Task) wakeRobustListOne(addr hostarch.Addr) {
// Bit 0 in address signals PI futex.
pi := addr&1 == 1
addr = addr &^ 1
diff --git a/pkg/sentry/kernel/task_image.go b/pkg/sentry/kernel/task_image.go
index ce5fbd299..bd5543d4e 100644
--- a/pkg/sentry/kernel/task_image.go
+++ b/pkg/sentry/kernel/task_image.go
@@ -19,12 +19,12 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel/futex"
"gvisor.dev/gvisor/pkg/sentry/loader"
"gvisor.dev/gvisor/pkg/sentry/mm"
"gvisor.dev/gvisor/pkg/syserr"
- "gvisor.dev/gvisor/pkg/usermem"
)
var errNoSyscalls = syserr.New("no syscall table found", linux.ENOEXEC)
@@ -129,7 +129,7 @@ func (t *Task) Stack() *arch.Stack {
return &arch.Stack{
Arch: t.Arch(),
IO: t.MemoryManager(),
- Bottom: usermem.Addr(t.Arch().Stack()),
+ Bottom: hostarch.Addr(t.Arch().Stack()),
}
}
diff --git a/pkg/sentry/kernel/task_log.go b/pkg/sentry/kernel/task_log.go
index c70e5e6ce..72b9a0384 100644
--- a/pkg/sentry/kernel/task_log.go
+++ b/pkg/sentry/kernel/task_log.go
@@ -20,6 +20,7 @@ import (
"sort"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -108,9 +109,9 @@ func (t *Task) debugDumpStack() {
return
}
t.Debugf("Stack:")
- start := usermem.Addr(t.Arch().Stack())
+ start := hostarch.Addr(t.Arch().Stack())
// Round addr down to a 16-byte boundary.
- start &= ^usermem.Addr(15)
+ start &= ^hostarch.Addr(15)
// Print 16 bytes per line, one byte at a time.
for offset := uint64(0); offset < maxStackDebugBytes; offset += 16 {
addr, ok := start.AddLength(offset)
@@ -127,7 +128,7 @@ func (t *Task) debugDumpStack() {
t.Debugf("%x: % x", addr, data[:n])
}
if err != nil {
- t.Debugf("Error reading stack at address %x: %v", addr+usermem.Addr(n), err)
+ t.Debugf("Error reading stack at address %x: %v", addr+hostarch.Addr(n), err)
break
}
}
@@ -147,9 +148,9 @@ func (t *Task) debugDumpCode() {
}
t.Debugf("Code:")
// Print code on both sides of the instruction register.
- start := usermem.Addr(t.Arch().IP()) - maxCodeDebugBytes/2
+ start := hostarch.Addr(t.Arch().IP()) - maxCodeDebugBytes/2
// Round addr down to a 16-byte boundary.
- start &= ^usermem.Addr(15)
+ start &= ^hostarch.Addr(15)
// Print 16 bytes per line, one byte at a time.
for offset := uint64(0); offset < maxCodeDebugBytes; offset += 16 {
addr, ok := start.AddLength(offset)
@@ -166,7 +167,7 @@ func (t *Task) debugDumpCode() {
t.Debugf("%x: % x", addr, data[:n])
}
if err != nil {
- t.Debugf("Error reading stack at address %x: %v", addr+usermem.Addr(n), err)
+ t.Debugf("Error reading stack at address %x: %v", addr+hostarch.Addr(n), err)
break
}
}
diff --git a/pkg/sentry/kernel/task_run.go b/pkg/sentry/kernel/task_run.go
index 3ccecf4b6..068f25af1 100644
--- a/pkg/sentry/kernel/task_run.go
+++ b/pkg/sentry/kernel/task_run.go
@@ -23,13 +23,13 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/goid"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/hostcpu"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/platform"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
// A taskRunState is a reified state in the task state machine. See README.md
@@ -148,7 +148,7 @@ func (*runApp) handleCPUIDInstruction(t *Task) error {
region := trace.StartRegion(t.traceContext, cpuidRegion)
expected := arch.CPUIDInstruction[:]
found := make([]byte, len(expected))
- _, err := t.CopyInBytes(usermem.Addr(t.Arch().IP()), found)
+ _, err := t.CopyInBytes(hostarch.Addr(t.Arch().IP()), found)
if err == nil && bytes.Equal(expected, found) {
// Skip the cpuid instruction.
t.Arch().CPUIDEmulate(t)
@@ -307,8 +307,8 @@ func (app *runApp) execute(t *Task) taskRunState {
// normally.
if at.Any() {
region := trace.StartRegion(t.traceContext, faultRegion)
- addr := usermem.Addr(info.Addr())
- err := t.MemoryManager().HandleUserFault(t, addr, at, usermem.Addr(t.Arch().Stack()))
+ addr := hostarch.Addr(info.Addr())
+ err := t.MemoryManager().HandleUserFault(t, addr, at, hostarch.Addr(t.Arch().Stack()))
region.End()
if err == nil {
// The fault was handled appropriately.
diff --git a/pkg/sentry/kernel/task_signals.go b/pkg/sentry/kernel/task_signals.go
index 75af3af79..c2b9fc08f 100644
--- a/pkg/sentry/kernel/task_signals.go
+++ b/pkg/sentry/kernel/task_signals.go
@@ -23,11 +23,11 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/eventchannel"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
ucspb "gvisor.dev/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -243,7 +243,7 @@ func (t *Task) deliverSignalToHandler(info *arch.SignalInfo, act arch.SignalAct)
// Are executing on the main stack,
// or the provided alternate stack?
- sp := usermem.Addr(t.Arch().Stack())
+ sp := hostarch.Addr(t.Arch().Stack())
// N.B. This is a *copy* of the alternate stack that the user's signal
// handler expects to see in its ucontext (even if it's not in use).
@@ -251,7 +251,7 @@ func (t *Task) deliverSignalToHandler(info *arch.SignalInfo, act arch.SignalAct)
if act.IsOnStack() && alt.IsEnabled() {
alt.SetOnStack()
if !alt.Contains(sp) {
- sp = usermem.Addr(alt.Top())
+ sp = hostarch.Addr(alt.Top())
}
}
@@ -652,7 +652,7 @@ func (t *Task) SignalStack() arch.SignalStack {
// onSignalStack returns true if the task is executing on the given signal stack.
func (t *Task) onSignalStack(alt arch.SignalStack) bool {
- sp := usermem.Addr(t.Arch().Stack())
+ sp := hostarch.Addr(t.Arch().Stack())
return alt.Contains(sp)
}
@@ -720,7 +720,7 @@ func (tg *ThreadGroup) SetSignalAct(sig linux.Signal, actptr *arch.SignalAct) (a
// CopyOutSignalAct converts the given SignalAct into an architecture-specific
// type and then copies it out to task memory.
-func (t *Task) CopyOutSignalAct(addr usermem.Addr, s *arch.SignalAct) error {
+func (t *Task) CopyOutSignalAct(addr hostarch.Addr, s *arch.SignalAct) error {
n := t.Arch().NewSignalAct()
n.SerializeFrom(s)
_, err := n.CopyOut(t, addr)
@@ -729,7 +729,7 @@ func (t *Task) CopyOutSignalAct(addr usermem.Addr, s *arch.SignalAct) error {
// CopyInSignalAct copies an architecture-specific sigaction type from task
// memory and then converts it into a SignalAct.
-func (t *Task) CopyInSignalAct(addr usermem.Addr) (arch.SignalAct, error) {
+func (t *Task) CopyInSignalAct(addr hostarch.Addr) (arch.SignalAct, error) {
n := t.Arch().NewSignalAct()
var s arch.SignalAct
if _, err := n.CopyIn(t, addr); err != nil {
@@ -741,7 +741,7 @@ func (t *Task) CopyInSignalAct(addr usermem.Addr) (arch.SignalAct, error) {
// CopyOutSignalStack converts the given SignalStack into an
// architecture-specific type and then copies it out to task memory.
-func (t *Task) CopyOutSignalStack(addr usermem.Addr, s *arch.SignalStack) error {
+func (t *Task) CopyOutSignalStack(addr hostarch.Addr, s *arch.SignalStack) error {
n := t.Arch().NewSignalStack()
n.SerializeFrom(s)
_, err := n.CopyOut(t, addr)
@@ -750,7 +750,7 @@ func (t *Task) CopyOutSignalStack(addr usermem.Addr, s *arch.SignalStack) error
// CopyInSignalStack copies an architecture-specific stack_t from task memory
// and then converts it into a SignalStack.
-func (t *Task) CopyInSignalStack(addr usermem.Addr) (arch.SignalStack, error) {
+func (t *Task) CopyInSignalStack(addr hostarch.Addr) (arch.SignalStack, error) {
n := t.Arch().NewSignalStack()
var s arch.SignalStack
if _, err := n.CopyIn(t, addr); err != nil {
diff --git a/pkg/sentry/kernel/task_start.go b/pkg/sentry/kernel/task_start.go
index 36e1384f1..32031cd70 100644
--- a/pkg/sentry/kernel/task_start.go
+++ b/pkg/sentry/kernel/task_start.go
@@ -17,6 +17,7 @@ package kernel
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/inet"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@@ -25,7 +26,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/usage"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
// TaskConfig defines the configuration of a new Task (see below).
@@ -86,7 +86,7 @@ type TaskConfig struct {
MountNamespaceVFS2 *vfs.MountNamespace
// RSeqAddr is a pointer to the the userspace linux.RSeq structure.
- RSeqAddr usermem.Addr
+ RSeqAddr hostarch.Addr
// RSeqSignature is the signature that the rseq abort IP must be signed
// with.
@@ -151,6 +151,7 @@ func (ts *TaskSet) newTask(cfg *TaskConfig) (*Task, error) {
rseqSignature: cfg.RSeqSignature,
futexWaiter: futex.NewWaiter(),
containerID: cfg.ContainerID,
+ cgroups: make(map[Cgroup]struct{}),
}
t.creds.Store(cfg.Credentials)
t.endStopCond.L = &t.tg.signalHandlers.mu
@@ -189,6 +190,10 @@ func (ts *TaskSet) newTask(cfg *TaskConfig) (*Task, error) {
t.parent.children[t] = struct{}{}
}
+ if VFS2Enabled {
+ t.EnterInitialCgroups(t.parent)
+ }
+
if tg.leader == nil {
// New thread group.
tg.leader = t
diff --git a/pkg/sentry/kernel/task_syscall.go b/pkg/sentry/kernel/task_syscall.go
index 2e84bd88a..2c658d001 100644
--- a/pkg/sentry/kernel/task_syscall.go
+++ b/pkg/sentry/kernel/task_syscall.go
@@ -22,12 +22,12 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/bits"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/marshal"
"gvisor.dev/gvisor/pkg/metric"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
var vsyscallCount = metric.MustCreateNewUint64Metric("/kernel/vsyscall_count", false /* sync */, "Number of times vsyscalls were invoked by the application")
@@ -153,7 +153,7 @@ func (t *Task) doSyscall() taskRunState {
// Check seccomp filters. The nil check is for performance (as seccomp use
// is rare), not needed for correctness.
if t.syscallFilters.Load() != nil {
- switch r := t.checkSeccompSyscall(int32(sysno), args, usermem.Addr(t.Arch().IP())); r {
+ switch r := t.checkSeccompSyscall(int32(sysno), args, hostarch.Addr(t.Arch().IP())); r {
case linux.SECCOMP_RET_ERRNO, linux.SECCOMP_RET_TRAP:
t.Debugf("Syscall %d: denied by seccomp", sysno)
return (*runSyscallExit)(nil)
@@ -283,12 +283,12 @@ func (*runSyscallExit) execute(t *Task) taskRunState {
// doVsyscall is the entry point for a vsyscall invocation of syscall sysno, as
// indicated by an execution fault at address addr. doVsyscall returns the
// task's next run state.
-func (t *Task) doVsyscall(addr usermem.Addr, sysno uintptr) taskRunState {
+func (t *Task) doVsyscall(addr hostarch.Addr, sysno uintptr) taskRunState {
vsyscallCount.Increment()
// Grab the caller up front, to make sure there's a sensible stack.
caller := t.Arch().Native(uintptr(0))
- if _, err := caller.CopyIn(t, usermem.Addr(t.Arch().Stack())); err != nil {
+ if _, err := caller.CopyIn(t, hostarch.Addr(t.Arch().Stack())); err != nil {
t.Debugf("vsyscall %d: error reading return address from stack: %v", sysno, err)
t.forceSignal(linux.SIGSEGV, false /* unconditional */)
t.SendSignal(SignalInfoPriv(linux.SIGSEGV))
@@ -322,7 +322,7 @@ func (t *Task) doVsyscall(addr usermem.Addr, sysno uintptr) taskRunState {
}
type runVsyscallAfterPtraceEventSeccomp struct {
- addr usermem.Addr
+ addr hostarch.Addr
sysno uintptr
caller marshal.Marshallable
}
@@ -337,7 +337,7 @@ func (r *runVsyscallAfterPtraceEventSeccomp) execute(t *Task) taskRunState {
// currently emulated call. ... The tracer MUST NOT modify rip or rsp." -
// Documentation/prctl/seccomp_filter.txt. On Linux, changing orig_ax or ip
// causes do_exit(SIGSYS), and changing sp is ignored.
- if (sysno != ^uintptr(0) && sysno != r.sysno) || usermem.Addr(t.Arch().IP()) != r.addr {
+ if (sysno != ^uintptr(0) && sysno != r.sysno) || hostarch.Addr(t.Arch().IP()) != r.addr {
t.PrepareExit(ExitStatus{Signo: int(linux.SIGSYS)})
return (*runExit)(nil)
}
diff --git a/pkg/sentry/kernel/task_usermem.go b/pkg/sentry/kernel/task_usermem.go
index 94dabbcd8..fc6d9438a 100644
--- a/pkg/sentry/kernel/task_usermem.go
+++ b/pkg/sentry/kernel/task_usermem.go
@@ -19,6 +19,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/mm"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
@@ -27,7 +28,7 @@ import (
// MAX_RW_COUNT is the maximum size in bytes of a single read or write.
// Reads and writes that exceed this size may be silently truncated.
// (Linux: include/linux/fs.h:MAX_RW_COUNT)
-var MAX_RW_COUNT = int(usermem.Addr(math.MaxInt32).RoundDown())
+var MAX_RW_COUNT = int(hostarch.Addr(math.MaxInt32).RoundDown())
// Activate ensures that the task has an active address space.
func (t *Task) Activate() {
@@ -49,7 +50,7 @@ func (t *Task) Deactivate() {
// data without reflection and pass in a byte slice.
//
// This Task's AddressSpace must be active.
-func (t *Task) CopyInBytes(addr usermem.Addr, dst []byte) (int, error) {
+func (t *Task) CopyInBytes(addr hostarch.Addr, dst []byte) (int, error) {
return t.MemoryManager().CopyIn(t, addr, dst, usermem.IOOpts{
AddressSpaceActive: true,
})
@@ -59,7 +60,7 @@ func (t *Task) CopyInBytes(addr usermem.Addr, dst []byte) (int, error) {
// data without reflection and pass in a byte slice.
//
// This Task's AddressSpace must be active.
-func (t *Task) CopyOutBytes(addr usermem.Addr, src []byte) (int, error) {
+func (t *Task) CopyOutBytes(addr hostarch.Addr, src []byte) (int, error) {
return t.MemoryManager().CopyOut(t, addr, src, usermem.IOOpts{
AddressSpaceActive: true,
})
@@ -70,7 +71,7 @@ func (t *Task) CopyOutBytes(addr usermem.Addr, src []byte) (int, error) {
// user memory that is unmapped or not readable by the user.
//
// This Task's AddressSpace must be active.
-func (t *Task) CopyInString(addr usermem.Addr, maxlen int) (string, error) {
+func (t *Task) CopyInString(addr hostarch.Addr, maxlen int) (string, error) {
return usermem.CopyStringIn(t, t.MemoryManager(), addr, maxlen, usermem.IOOpts{
AddressSpaceActive: true,
})
@@ -90,7 +91,7 @@ func (t *Task) CopyInString(addr usermem.Addr, maxlen int) (string, error) {
// { "abc" } => 4 (3 for length, 1 for elements)
//
// This Task's AddressSpace must be active.
-func (t *Task) CopyInVector(addr usermem.Addr, maxElemSize, maxTotalSize int) ([]string, error) {
+func (t *Task) CopyInVector(addr hostarch.Addr, maxElemSize, maxTotalSize int) ([]string, error) {
var v []string
for {
argAddr := t.Arch().Native(0)
@@ -109,12 +110,12 @@ func (t *Task) CopyInVector(addr usermem.Addr, maxElemSize, maxTotalSize int) ([
if maxTotalSize < thisMax {
thisMax = maxTotalSize
}
- arg, err := t.CopyInString(usermem.Addr(t.Arch().Value(argAddr)), thisMax)
+ arg, err := t.CopyInString(hostarch.Addr(t.Arch().Value(argAddr)), thisMax)
if err != nil {
return v, err
}
v = append(v, arg)
- addr += usermem.Addr(t.Arch().Width())
+ addr += hostarch.Addr(t.Arch().Width())
maxTotalSize -= len(arg) + 1
}
return v, nil
@@ -126,7 +127,7 @@ func (t *Task) CopyInVector(addr usermem.Addr, maxElemSize, maxTotalSize int) ([
// Preconditions: Same as usermem.IO.CopyOut, plus:
// * The caller must be running on the task goroutine.
// * t's AddressSpace must be active.
-func (t *Task) CopyOutIovecs(addr usermem.Addr, src usermem.AddrRangeSeq) error {
+func (t *Task) CopyOutIovecs(addr hostarch.Addr, src hostarch.AddrRangeSeq) error {
switch t.Arch().Width() {
case 8:
const itemLen = 16
@@ -137,8 +138,8 @@ func (t *Task) CopyOutIovecs(addr usermem.Addr, src usermem.AddrRangeSeq) error
b := t.CopyScratchBuffer(itemLen)
for ; !src.IsEmpty(); src = src.Tail() {
ar := src.Head()
- usermem.ByteOrder.PutUint64(b[0:8], uint64(ar.Start))
- usermem.ByteOrder.PutUint64(b[8:16], uint64(ar.Length()))
+ hostarch.ByteOrder.PutUint64(b[0:8], uint64(ar.Start))
+ hostarch.ByteOrder.PutUint64(b[8:16], uint64(ar.Length()))
if _, err := t.CopyOutBytes(addr, b); err != nil {
return err
}
@@ -153,8 +154,8 @@ func (t *Task) CopyOutIovecs(addr usermem.Addr, src usermem.AddrRangeSeq) error
}
// CopyInIovecs copies an array of numIovecs struct iovecs from the memory
-// mapped at addr, converts them to usermem.AddrRanges, and returns them as a
-// usermem.AddrRangeSeq.
+// mapped at addr, converts them to hostarch.AddrRanges, and returns them as a
+// hostarch.AddrRangeSeq.
//
// CopyInIovecs shares the following properties with Linux's
// lib/iov_iter.c:import_iovec() => fs/read_write.c:rw_copy_check_uvector():
@@ -175,42 +176,42 @@ func (t *Task) CopyOutIovecs(addr usermem.Addr, src usermem.AddrRangeSeq) error
// Preconditions: Same as usermem.IO.CopyIn, plus:
// * The caller must be running on the task goroutine.
// * t's AddressSpace must be active.
-func (t *Task) CopyInIovecs(addr usermem.Addr, numIovecs int) (usermem.AddrRangeSeq, error) {
+func (t *Task) CopyInIovecs(addr hostarch.Addr, numIovecs int) (hostarch.AddrRangeSeq, error) {
if numIovecs == 0 {
- return usermem.AddrRangeSeq{}, nil
+ return hostarch.AddrRangeSeq{}, nil
}
- var dst []usermem.AddrRange
+ var dst []hostarch.AddrRange
if numIovecs > 1 {
- dst = make([]usermem.AddrRange, 0, numIovecs)
+ dst = make([]hostarch.AddrRange, 0, numIovecs)
}
switch t.Arch().Width() {
case 8:
const itemLen = 16
if _, ok := addr.AddLength(uint64(numIovecs) * itemLen); !ok {
- return usermem.AddrRangeSeq{}, syserror.EFAULT
+ return hostarch.AddrRangeSeq{}, syserror.EFAULT
}
b := t.CopyScratchBuffer(itemLen)
for i := 0; i < numIovecs; i++ {
if _, err := t.CopyInBytes(addr, b); err != nil {
- return usermem.AddrRangeSeq{}, err
+ return hostarch.AddrRangeSeq{}, err
}
- base := usermem.Addr(usermem.ByteOrder.Uint64(b[0:8]))
- length := usermem.ByteOrder.Uint64(b[8:16])
+ base := hostarch.Addr(hostarch.ByteOrder.Uint64(b[0:8]))
+ length := hostarch.ByteOrder.Uint64(b[8:16])
if length > math.MaxInt64 {
- return usermem.AddrRangeSeq{}, syserror.EINVAL
+ return hostarch.AddrRangeSeq{}, syserror.EINVAL
}
ar, ok := t.MemoryManager().CheckIORange(base, int64(length))
if !ok {
- return usermem.AddrRangeSeq{}, syserror.EFAULT
+ return hostarch.AddrRangeSeq{}, syserror.EFAULT
}
if numIovecs == 1 {
// Special case to avoid allocating dst.
- return usermem.AddrRangeSeqOf(ar).TakeFirst(MAX_RW_COUNT), nil
+ return hostarch.AddrRangeSeqOf(ar).TakeFirst(MAX_RW_COUNT), nil
}
dst = append(dst, ar)
@@ -218,7 +219,7 @@ func (t *Task) CopyInIovecs(addr usermem.Addr, numIovecs int) (usermem.AddrRange
}
default:
- return usermem.AddrRangeSeq{}, syserror.ENOSYS
+ return hostarch.AddrRangeSeq{}, syserror.ENOSYS
}
// Truncate to MAX_RW_COUNT.
@@ -226,13 +227,13 @@ func (t *Task) CopyInIovecs(addr usermem.Addr, numIovecs int) (usermem.AddrRange
for i := range dst {
dstlen := uint64(dst[i].Length())
if rem := uint64(MAX_RW_COUNT) - total; rem < dstlen {
- dst[i].End -= usermem.Addr(dstlen - rem)
+ dst[i].End -= hostarch.Addr(dstlen - rem)
dstlen = rem
}
total += dstlen
}
- return usermem.AddrRangeSeqFromSlice(dst), nil
+ return hostarch.AddrRangeSeqFromSlice(dst), nil
}
// SingleIOSequence returns a usermem.IOSequence representing [addr,
@@ -245,7 +246,7 @@ func (t *Task) CopyInIovecs(addr usermem.Addr, numIovecs int) (usermem.AddrRange
// write syscalls in Linux do not use import_single_range(). However they check
// access_ok() in fs/read_write.c:vfs_read/vfs_write, and overflowing address
// ranges are truncated to MAX_RW_COUNT by fs/read_write.c:rw_verify_area().)
-func (t *Task) SingleIOSequence(addr usermem.Addr, length int, opts usermem.IOOpts) (usermem.IOSequence, error) {
+func (t *Task) SingleIOSequence(addr hostarch.Addr, length int, opts usermem.IOOpts) (usermem.IOSequence, error) {
if length > MAX_RW_COUNT {
length = MAX_RW_COUNT
}
@@ -255,7 +256,7 @@ func (t *Task) SingleIOSequence(addr usermem.Addr, length int, opts usermem.IOOp
}
return usermem.IOSequence{
IO: t.MemoryManager(),
- Addrs: usermem.AddrRangeSeqOf(ar),
+ Addrs: hostarch.AddrRangeSeqOf(ar),
Opts: opts,
}, nil
}
@@ -267,7 +268,7 @@ func (t *Task) SingleIOSequence(addr usermem.Addr, length int, opts usermem.IOOp
// IovecsIOSequence is analogous to Linux's lib/iov_iter.c:import_iovec().
//
// Preconditions: Same as Task.CopyInIovecs.
-func (t *Task) IovecsIOSequence(addr usermem.Addr, iovcnt int, opts usermem.IOOpts) (usermem.IOSequence, error) {
+func (t *Task) IovecsIOSequence(addr hostarch.Addr, iovcnt int, opts usermem.IOOpts) (usermem.IOSequence, error) {
if iovcnt < 0 || iovcnt > linux.UIO_MAXIOV {
return usermem.IOSequence{}, syserror.EINVAL
}
@@ -317,7 +318,7 @@ func (cc *taskCopyContext) getMemoryManager() (*mm.MemoryManager, error) {
}
// CopyInBytes implements marshal.CopyContext.CopyInBytes.
-func (cc *taskCopyContext) CopyInBytes(addr usermem.Addr, dst []byte) (int, error) {
+func (cc *taskCopyContext) CopyInBytes(addr hostarch.Addr, dst []byte) (int, error) {
tmm, err := cc.getMemoryManager()
if err != nil {
return 0, err
@@ -327,7 +328,7 @@ func (cc *taskCopyContext) CopyInBytes(addr usermem.Addr, dst []byte) (int, erro
}
// CopyOutBytes implements marshal.CopyContext.CopyOutBytes.
-func (cc *taskCopyContext) CopyOutBytes(addr usermem.Addr, src []byte) (int, error) {
+func (cc *taskCopyContext) CopyOutBytes(addr hostarch.Addr, src []byte) (int, error) {
tmm, err := cc.getMemoryManager()
if err != nil {
return 0, err
@@ -360,11 +361,11 @@ func (cc *ownTaskCopyContext) CopyScratchBuffer(size int) []byte {
}
// CopyInBytes implements marshal.CopyContext.CopyInBytes.
-func (cc *ownTaskCopyContext) CopyInBytes(addr usermem.Addr, dst []byte) (int, error) {
+func (cc *ownTaskCopyContext) CopyInBytes(addr hostarch.Addr, dst []byte) (int, error) {
return cc.t.MemoryManager().CopyIn(cc.t, addr, dst, cc.opts)
}
// CopyOutBytes implements marshal.CopyContext.CopyOutBytes.
-func (cc *ownTaskCopyContext) CopyOutBytes(addr usermem.Addr, src []byte) (int, error) {
+func (cc *ownTaskCopyContext) CopyOutBytes(addr hostarch.Addr, src []byte) (int, error) {
return cc.t.MemoryManager().CopyOut(cc.t, addr, src, cc.opts)
}
diff --git a/pkg/sentry/kernel/threads.go b/pkg/sentry/kernel/threads.go
index 09d070ec8..77ad62445 100644
--- a/pkg/sentry/kernel/threads.go
+++ b/pkg/sentry/kernel/threads.go
@@ -114,6 +114,15 @@ func (ts *TaskSet) forEachThreadGroupLocked(f func(tg *ThreadGroup)) {
}
}
+// forEachTaskLocked applies f to each Task in ts.
+//
+// Preconditions: ts.mu must be locked (for reading or writing).
+func (ts *TaskSet) forEachTaskLocked(f func(t *Task)) {
+ for t := range ts.Root.tids {
+ f(t)
+ }
+}
+
// A PIDNamespace represents a PID namespace, a bimap between thread IDs and
// tasks. See the pid_namespaces(7) man page for further details.
//
diff --git a/pkg/sentry/kernel/timekeeper_test.go b/pkg/sentry/kernel/timekeeper_test.go
index cf2f7ca72..dfc3c0719 100644
--- a/pkg/sentry/kernel/timekeeper_test.go
+++ b/pkg/sentry/kernel/timekeeper_test.go
@@ -17,12 +17,12 @@ package kernel
import (
"testing"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/contexttest"
"gvisor.dev/gvisor/pkg/sentry/pgalloc"
sentrytime "gvisor.dev/gvisor/pkg/sentry/time"
"gvisor.dev/gvisor/pkg/sentry/usage"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
// mockClocks is a sentrytime.Clocks that simply returns the times in the
@@ -54,7 +54,7 @@ func (c *mockClocks) GetTime(id sentrytime.ClockID) (int64, error) {
func stateTestClocklessTimekeeper(tb testing.TB) *Timekeeper {
ctx := contexttest.Context(tb)
mfp := pgalloc.MemoryFileProviderFromContext(ctx)
- fr, err := mfp.MemoryFile().Allocate(usermem.PageSize, usage.Anonymous)
+ fr, err := mfp.MemoryFile().Allocate(hostarch.PageSize, usage.Anonymous)
if err != nil {
tb.Fatalf("failed to allocate memory: %v", err)
}
diff --git a/pkg/sentry/kernel/vdso.go b/pkg/sentry/kernel/vdso.go
index 9e5c2d26f..cc0917504 100644
--- a/pkg/sentry/kernel/vdso.go
+++ b/pkg/sentry/kernel/vdso.go
@@ -17,10 +17,10 @@ package kernel
import (
"fmt"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/pgalloc"
- "gvisor.dev/gvisor/pkg/usermem"
)
// vdsoParams are the parameters exposed to the VDSO.
@@ -96,7 +96,7 @@ func NewVDSOParamPage(mfp pgalloc.MemoryFileProvider, fr memmap.FileRange) *VDSO
// access returns a mapping of the param page.
func (v *VDSOParamPage) access() (safemem.Block, error) {
- bs, err := v.mfp.MemoryFile().MapInternal(v.fr, usermem.ReadWrite)
+ bs, err := v.mfp.MemoryFile().MapInternal(v.fr, hostarch.ReadWrite)
if err != nil {
return safemem.Block{}, err
}
diff --git a/pkg/sentry/loader/BUILD b/pkg/sentry/loader/BUILD
index ab074b400..ecb6603a1 100644
--- a/pkg/sentry/loader/BUILD
+++ b/pkg/sentry/loader/BUILD
@@ -18,6 +18,7 @@ go_library(
"//pkg/binary",
"//pkg/context",
"//pkg/cpuid",
+ "//pkg/hostarch",
"//pkg/log",
"//pkg/rand",
"//pkg/safemem",
diff --git a/pkg/sentry/loader/elf.go b/pkg/sentry/loader/elf.go
index cd9fa4031..e92d9fdc3 100644
--- a/pkg/sentry/loader/elf.go
+++ b/pkg/sentry/loader/elf.go
@@ -25,6 +25,7 @@ import (
"gvisor.dev/gvisor/pkg/binary"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/cpuid"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fsbridge"
@@ -41,7 +42,7 @@ const (
// maxTotalPhdrSize is the maximum combined size of all program
// headers. Linux limits this to one page.
- maxTotalPhdrSize = usermem.PageSize
+ maxTotalPhdrSize = hostarch.PageSize
)
var (
@@ -52,8 +53,8 @@ var (
prog64Size = int(binary.Size(elf.Prog64{}))
)
-func progFlagsAsPerms(f elf.ProgFlag) usermem.AccessType {
- var p usermem.AccessType
+func progFlagsAsPerms(f elf.ProgFlag) hostarch.AccessType {
+ var p hostarch.AccessType
if f&elf.PF_R == elf.PF_R {
p.Read = true
}
@@ -75,7 +76,7 @@ type elfInfo struct {
arch arch.Arch
// entry is the program entry point.
- entry usermem.Addr
+ entry hostarch.Addr
// phdrs are the program headers.
phdrs []elf.ProgHeader
@@ -230,7 +231,7 @@ func parseHeader(ctx context.Context, f fullReader) (elfInfo, error) {
return elfInfo{
os: os,
arch: a,
- entry: usermem.Addr(hdr.Entry),
+ entry: hostarch.Addr(hdr.Entry),
phdrs: phdrs,
phdrOff: hdr.Phoff,
phdrSize: prog64Size,
@@ -240,9 +241,9 @@ func parseHeader(ctx context.Context, f fullReader) (elfInfo, error) {
// mapSegment maps a phdr into the Task. offset is the offset to apply to
// phdr.Vaddr.
-func mapSegment(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, phdr *elf.ProgHeader, offset usermem.Addr) error {
+func mapSegment(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, phdr *elf.ProgHeader, offset hostarch.Addr) error {
// We must make a page-aligned mapping.
- adjust := usermem.Addr(phdr.Vaddr).PageOffset()
+ adjust := hostarch.Addr(phdr.Vaddr).PageOffset()
addr, ok := offset.AddLength(phdr.Vaddr)
if !ok {
@@ -250,14 +251,14 @@ func mapSegment(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, phdr
ctx.Warningf("Computed segment load address overflows: %#x + %#x", phdr.Vaddr, offset)
return syserror.ENOEXEC
}
- addr -= usermem.Addr(adjust)
+ addr -= hostarch.Addr(adjust)
fileSize := phdr.Filesz + adjust
if fileSize < phdr.Filesz {
ctx.Infof("Computed segment file size overflows: %#x + %#x", phdr.Filesz, adjust)
return syserror.ENOEXEC
}
- ms, ok := usermem.Addr(fileSize).RoundUp()
+ ms, ok := hostarch.Addr(fileSize).RoundUp()
if !ok {
ctx.Infof("fileSize %#x too large", fileSize)
return syserror.ENOEXEC
@@ -281,7 +282,7 @@ func mapSegment(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, phdr
Unmap: true,
Private: true,
Perms: prot,
- MaxPerms: usermem.AnyAccess,
+ MaxPerms: hostarch.AnyAccess,
}
defer func() {
if mopts.MappingIdentity != nil {
@@ -312,7 +313,7 @@ func mapSegment(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, phdr
panic(fmt.Sprintf("zeroSize too big? %#x", uint64(zeroSize)))
}
if _, err := m.ZeroOut(ctx, zeroAddr, zeroSize, usermem.IOOpts{IgnorePermissions: true}); err != nil {
- ctx.Warningf("Failed to zero end of page [%#x, %#x): %v", zeroAddr, zeroAddr+usermem.Addr(zeroSize), err)
+ ctx.Warningf("Failed to zero end of page [%#x, %#x): %v", zeroAddr, zeroAddr+hostarch.Addr(zeroSize), err)
return err
}
}
@@ -330,7 +331,7 @@ func mapSegment(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, phdr
if !ok {
panic(fmt.Sprintf("anonymous memory doesn't fit in pre-sized range? %#x + %#x", addr, mapSize))
}
- anonSize, ok := usermem.Addr(memSize - mapSize).RoundUp()
+ anonSize, ok := hostarch.Addr(memSize - mapSize).RoundUp()
if !ok {
ctx.Infof("extra anon pages too large: %#x", memSize-mapSize)
return syserror.ENOEXEC
@@ -339,7 +340,7 @@ func mapSegment(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, phdr
// N.B. Linux uses vm_brk_flags to map these pages, which only
// honors the X bit, always mapping at least RW. ignoring These
// pages are not included in the final brk region.
- prot := usermem.ReadWrite
+ prot := hostarch.ReadWrite
if phdr.Flags&elf.PF_X == elf.PF_X {
prot.Execute = true
}
@@ -352,7 +353,7 @@ func mapSegment(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, phdr
Fixed: true,
Private: true,
Perms: prot,
- MaxPerms: usermem.AnyAccess,
+ MaxPerms: hostarch.AnyAccess,
}); err != nil {
ctx.Infof("Error mapping PT_LOAD segment %v anonymous memory: %v", phdr, err)
return err
@@ -371,19 +372,19 @@ type loadedELF struct {
arch arch.Arch
// entry is the entry point of the ELF.
- entry usermem.Addr
+ entry hostarch.Addr
// start is the end of the ELF.
- start usermem.Addr
+ start hostarch.Addr
// end is the end of the ELF.
- end usermem.Addr
+ end hostarch.Addr
// interpter is the path to the ELF interpreter.
interpreter string
// phdrAddr is the address of the ELF program headers.
- phdrAddr usermem.Addr
+ phdrAddr hostarch.Addr
// phdrSize is the size of a single program header in the ELF.
phdrSize int
@@ -407,14 +408,14 @@ type loadedELF struct {
// It does not load the ELF interpreter, or return any auxv entries.
//
// Preconditions: f is an ELF file.
-func loadParsedELF(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, info elfInfo, sharedLoadOffset usermem.Addr) (loadedELF, error) {
+func loadParsedELF(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, info elfInfo, sharedLoadOffset hostarch.Addr) (loadedELF, error) {
first := true
- var start, end usermem.Addr
+ var start, end hostarch.Addr
var interpreter string
for _, phdr := range info.phdrs {
switch phdr.Type {
case elf.PT_LOAD:
- vaddr := usermem.Addr(phdr.Vaddr)
+ vaddr := hostarch.Addr(phdr.Vaddr)
if first {
first = false
start = vaddr
@@ -492,7 +493,7 @@ func loadParsedELF(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, in
// Note that the vaddr of the first PT_LOAD segment is ignored when
// choosing the load address (even if it is non-zero). The vaddr does
// become an offset from that load address.
- var offset usermem.Addr
+ var offset hostarch.Addr
if info.sharedObject {
totalSize := end - start
totalSize, ok := totalSize.RoundUp()
@@ -688,8 +689,8 @@ func loadELF(ctx context.Context, args LoadArgs) (loadedELF, arch.Context, error
// ELF-specific auxv entries.
bin.auxv = arch.Auxv{
arch.AuxEntry{linux.AT_PHDR, bin.phdrAddr},
- arch.AuxEntry{linux.AT_PHENT, usermem.Addr(bin.phdrSize)},
- arch.AuxEntry{linux.AT_PHNUM, usermem.Addr(bin.phdrNum)},
+ arch.AuxEntry{linux.AT_PHENT, hostarch.Addr(bin.phdrSize)},
+ arch.AuxEntry{linux.AT_PHNUM, hostarch.Addr(bin.phdrNum)},
arch.AuxEntry{linux.AT_ENTRY, bin.entry},
}
if bin.interpreter != "" {
diff --git a/pkg/sentry/loader/loader.go b/pkg/sentry/loader/loader.go
index c69b62db9..47e3775a3 100644
--- a/pkg/sentry/loader/loader.go
+++ b/pkg/sentry/loader/loader.go
@@ -25,6 +25,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/cpuid"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/rand"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fsbridge"
@@ -266,17 +267,17 @@ func Load(ctx context.Context, args LoadArgs, extraAuxv []arch.AuxEntry, vdso *V
// Add generic auxv entries.
auxv := append(loaded.auxv, arch.Auxv{
- arch.AuxEntry{linux.AT_UID, usermem.Addr(c.RealKUID.In(c.UserNamespace).OrOverflow())},
- arch.AuxEntry{linux.AT_EUID, usermem.Addr(c.EffectiveKUID.In(c.UserNamespace).OrOverflow())},
- arch.AuxEntry{linux.AT_GID, usermem.Addr(c.RealKGID.In(c.UserNamespace).OrOverflow())},
- arch.AuxEntry{linux.AT_EGID, usermem.Addr(c.EffectiveKGID.In(c.UserNamespace).OrOverflow())},
+ arch.AuxEntry{linux.AT_UID, hostarch.Addr(c.RealKUID.In(c.UserNamespace).OrOverflow())},
+ arch.AuxEntry{linux.AT_EUID, hostarch.Addr(c.EffectiveKUID.In(c.UserNamespace).OrOverflow())},
+ arch.AuxEntry{linux.AT_GID, hostarch.Addr(c.RealKGID.In(c.UserNamespace).OrOverflow())},
+ arch.AuxEntry{linux.AT_EGID, hostarch.Addr(c.EffectiveKGID.In(c.UserNamespace).OrOverflow())},
// The conditions that require AT_SECURE = 1 never arise. See
// kernel.Task.updateCredsForExecLocked.
arch.AuxEntry{linux.AT_SECURE, 0},
arch.AuxEntry{linux.AT_CLKTCK, linux.CLOCKS_PER_SEC},
arch.AuxEntry{linux.AT_EXECFN, execfn},
arch.AuxEntry{linux.AT_RANDOM, random},
- arch.AuxEntry{linux.AT_PAGESZ, usermem.PageSize},
+ arch.AuxEntry{linux.AT_PAGESZ, hostarch.PageSize},
arch.AuxEntry{linux.AT_SYSINFO_EHDR, vdsoAddr},
}...)
auxv = append(auxv, extraAuxv...)
diff --git a/pkg/sentry/loader/vdso.go b/pkg/sentry/loader/vdso.go
index a32d37d62..fd54261fd 100644
--- a/pkg/sentry/loader/vdso.go
+++ b/pkg/sentry/loader/vdso.go
@@ -23,6 +23,7 @@ import (
"gvisor.dev/gvisor/pkg/abi"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/arch"
@@ -90,7 +91,7 @@ func validateVDSO(ctx context.Context, f fullReader, size uint64) (elfInfo, erro
var first *elf.ProgHeader
var prev *elf.ProgHeader
- var prevEnd usermem.Addr
+ var prevEnd hostarch.Addr
for i, phdr := range info.phdrs {
if phdr.Type != elf.PT_LOAD {
continue
@@ -119,7 +120,7 @@ func validateVDSO(ctx context.Context, f fullReader, size uint64) (elfInfo, erro
return elfInfo{}, syserror.ENOEXEC
}
- start := usermem.Addr(memoryOffset)
+ start := hostarch.Addr(memoryOffset)
end, ok := start.AddLength(phdr.Memsz)
if !ok {
log.Warningf("PT_LOAD segment size overflows: %#x + %#x", start, end)
@@ -210,7 +211,7 @@ func PrepareVDSO(mfp pgalloc.MemoryFileProvider) (*VDSO, error) {
}
// Then copy it into a VDSO mapping.
- size, ok := usermem.Addr(len(vdsodata.Binary)).RoundUp()
+ size, ok := hostarch.Addr(len(vdsodata.Binary)).RoundUp()
if !ok {
return nil, fmt.Errorf("VDSO size overflows? %#x", len(vdsodata.Binary))
}
@@ -221,7 +222,7 @@ func PrepareVDSO(mfp pgalloc.MemoryFileProvider) (*VDSO, error) {
return nil, fmt.Errorf("unable to allocate VDSO memory: %v", err)
}
- ims, err := mf.MapInternal(vdso, usermem.ReadWrite)
+ ims, err := mf.MapInternal(vdso, hostarch.ReadWrite)
if err != nil {
mf.DecRef(vdso)
return nil, fmt.Errorf("unable to map VDSO memory: %v", err)
@@ -234,7 +235,7 @@ func PrepareVDSO(mfp pgalloc.MemoryFileProvider) (*VDSO, error) {
}
// Finally, allocate a param page for this VDSO.
- paramPage, err := mf.Allocate(usermem.PageSize, usage.System)
+ paramPage, err := mf.Allocate(hostarch.PageSize, usage.System)
if err != nil {
mf.DecRef(vdso)
return nil, fmt.Errorf("unable to allocate VDSO param page: %v", err)
@@ -266,7 +267,7 @@ func PrepareVDSO(mfp pgalloc.MemoryFileProvider) (*VDSO, error) {
// compatibility with such binaries, we load the VDSO much like Linux.
//
// loadVDSO takes a reference on the VDSO and parameter page FrameRegions.
-func loadVDSO(ctx context.Context, m *mm.MemoryManager, v *VDSO, bin loadedELF) (usermem.Addr, error) {
+func loadVDSO(ctx context.Context, m *mm.MemoryManager, v *VDSO, bin loadedELF) (hostarch.Addr, error) {
if v.os != bin.os {
ctx.Warningf("Binary ELF OS %v and VDSO ELF OS %v differ", bin.os, v.os)
return 0, syserror.ENOEXEC
@@ -297,8 +298,8 @@ func loadVDSO(ctx context.Context, m *mm.MemoryManager, v *VDSO, bin loadedELF)
Fixed: true,
Unmap: true,
Private: true,
- Perms: usermem.Read,
- MaxPerms: usermem.Read,
+ Perms: hostarch.Read,
+ MaxPerms: hostarch.Read,
})
if err != nil {
ctx.Infof("Unable to map VDSO param page: %v", err)
@@ -318,8 +319,8 @@ func loadVDSO(ctx context.Context, m *mm.MemoryManager, v *VDSO, bin loadedELF)
Fixed: true,
Unmap: true,
Private: true,
- Perms: usermem.Read,
- MaxPerms: usermem.AnyAccess,
+ Perms: hostarch.Read,
+ MaxPerms: hostarch.AnyAccess,
})
if err != nil {
ctx.Infof("Unable to map VDSO: %v", err)
@@ -349,7 +350,7 @@ func loadVDSO(ctx context.Context, m *mm.MemoryManager, v *VDSO, bin loadedELF)
return 0, syserror.ENOEXEC
}
segPage := segAddr.RoundDown()
- segSize := usermem.Addr(phdr.Memsz)
+ segSize := hostarch.Addr(phdr.Memsz)
segSize, ok = segSize.AddLength(segAddr.PageOffset())
if !ok {
ctx.Warningf("PT_LOAD segment memsize %#x + offset %#x overflows", phdr.Memsz, segAddr.PageOffset())
@@ -371,7 +372,7 @@ func loadVDSO(ctx context.Context, m *mm.MemoryManager, v *VDSO, bin loadedELF)
}
perms := progFlagsAsPerms(phdr.Flags)
- if perms != usermem.Read {
+ if perms != hostarch.Read {
if err := m.MProtect(segPage, uint64(segSize), perms, false); err != nil {
ctx.Warningf("Unable to set PT_LOAD segment protections %+v at [%#x, %#x): %v", perms, segAddr, segEnd, err)
return 0, syserror.ENOEXEC
diff --git a/pkg/sentry/memmap/BUILD b/pkg/sentry/memmap/BUILD
index 2c95669cd..c30e88725 100644
--- a/pkg/sentry/memmap/BUILD
+++ b/pkg/sentry/memmap/BUILD
@@ -51,6 +51,7 @@ go_library(
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/context",
+ "//pkg/hostarch",
"//pkg/log",
"//pkg/safemem",
"//pkg/syserror",
@@ -63,5 +64,5 @@ go_test(
size = "small",
srcs = ["mapping_set_test.go"],
library = ":memmap",
- deps = ["//pkg/usermem"],
+ deps = ["//pkg/hostarch"],
)
diff --git a/pkg/sentry/memmap/mapping_set.go b/pkg/sentry/memmap/mapping_set.go
index 457ed87f8..32863bb5e 100644
--- a/pkg/sentry/memmap/mapping_set.go
+++ b/pkg/sentry/memmap/mapping_set.go
@@ -18,7 +18,7 @@ import (
"fmt"
"math"
- "gvisor.dev/gvisor/pkg/usermem"
+ "gvisor.dev/gvisor/pkg/hostarch"
)
// MappingSet maps offsets into a Mappable to mappings of those offsets. It is
@@ -39,7 +39,7 @@ type MappingsOfRange map[MappingOfRange]struct{}
// +stateify savable
type MappingOfRange struct {
MappingSpace MappingSpace
- AddrRange usermem.AddrRange
+ AddrRange hostarch.AddrRange
Writable bool
}
@@ -89,9 +89,9 @@ func (mappingSetFunctions) Merge(r1 MappableRange, val1 MappingsOfRange, r2 Mapp
// region with k1.
k2 := MappingOfRange{
MappingSpace: k1.MappingSpace,
- AddrRange: usermem.AddrRange{
+ AddrRange: hostarch.AddrRange{
Start: k1.AddrRange.End,
- End: k1.AddrRange.End + usermem.Addr(r2.Length()),
+ End: k1.AddrRange.End + hostarch.Addr(r2.Length()),
},
Writable: k1.Writable,
}
@@ -102,7 +102,7 @@ func (mappingSetFunctions) Merge(r1 MappableRange, val1 MappingsOfRange, r2 Mapp
// OK. Add it to the merged map.
merged[MappingOfRange{
MappingSpace: k1.MappingSpace,
- AddrRange: usermem.AddrRange{
+ AddrRange: hostarch.AddrRange{
Start: k1.AddrRange.Start,
End: k2.AddrRange.End,
},
@@ -124,11 +124,11 @@ func (mappingSetFunctions) Split(r MappableRange, val MappingsOfRange, split uin
// split is a value in MappableRange, we need the offset into the
// corresponding MappingsOfRange.
- offset := usermem.Addr(split - r.Start)
+ offset := hostarch.Addr(split - r.Start)
for k := range val {
k1 := MappingOfRange{
MappingSpace: k.MappingSpace,
- AddrRange: usermem.AddrRange{
+ AddrRange: hostarch.AddrRange{
Start: k.AddrRange.Start,
End: k.AddrRange.Start + offset,
},
@@ -138,7 +138,7 @@ func (mappingSetFunctions) Split(r MappableRange, val MappingsOfRange, split uin
k2 := MappingOfRange{
MappingSpace: k.MappingSpace,
- AddrRange: usermem.AddrRange{
+ AddrRange: hostarch.AddrRange{
Start: k.AddrRange.Start + offset,
End: k.AddrRange.End,
},
@@ -157,18 +157,18 @@ func (mappingSetFunctions) Split(r MappableRange, val MappingsOfRange, split uin
// indicating that ms maps addresses [0x4000, 0x6000) to MappableRange [0x0,
// 0x2000). Then for subsetRange = [0x1000, 0x2000), subsetMapping returns a
// MappingOfRange for which AddrRange = [0x5000, 0x6000).
-func subsetMapping(wholeRange, subsetRange MappableRange, ms MappingSpace, addr usermem.Addr, writable bool) MappingOfRange {
+func subsetMapping(wholeRange, subsetRange MappableRange, ms MappingSpace, addr hostarch.Addr, writable bool) MappingOfRange {
if !wholeRange.IsSupersetOf(subsetRange) {
panic(fmt.Sprintf("%v is not a superset of %v", wholeRange, subsetRange))
}
offset := subsetRange.Start - wholeRange.Start
- start := addr + usermem.Addr(offset)
+ start := addr + hostarch.Addr(offset)
return MappingOfRange{
MappingSpace: ms,
- AddrRange: usermem.AddrRange{
+ AddrRange: hostarch.AddrRange{
Start: start,
- End: start + usermem.Addr(subsetRange.Length()),
+ End: start + hostarch.Addr(subsetRange.Length()),
},
Writable: writable,
}
@@ -178,7 +178,7 @@ func subsetMapping(wholeRange, subsetRange MappableRange, ms MappingSpace, addr
// previously had no mappings.
//
// Preconditions: Same as Mappable.AddMapping.
-func (s *MappingSet) AddMapping(ms MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) []MappableRange {
+func (s *MappingSet) AddMapping(ms MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) []MappableRange {
mr := MappableRange{offset, offset + uint64(ar.Length())}
var mapped []MappableRange
seg, gap := s.Find(mr.Start)
@@ -205,7 +205,7 @@ func (s *MappingSet) AddMapping(ms MappingSpace, ar usermem.AddrRange, offset ui
// MappableRanges that now have no mappings.
//
// Preconditions: Same as Mappable.RemoveMapping.
-func (s *MappingSet) RemoveMapping(ms MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) []MappableRange {
+func (s *MappingSet) RemoveMapping(ms MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) []MappableRange {
mr := MappableRange{offset, offset + uint64(ar.Length())}
var unmapped []MappableRange
diff --git a/pkg/sentry/memmap/mapping_set_test.go b/pkg/sentry/memmap/mapping_set_test.go
index d39efe38f..5cb81fde7 100644
--- a/pkg/sentry/memmap/mapping_set_test.go
+++ b/pkg/sentry/memmap/mapping_set_test.go
@@ -15,24 +15,23 @@
package memmap
import (
+ "gvisor.dev/gvisor/pkg/hostarch"
"reflect"
"testing"
-
- "gvisor.dev/gvisor/pkg/usermem"
)
type testMappingSpace struct {
// Ideally we'd store the full ranges that were invalidated, rather
// than individual calls to Invalidate, as they are an implementation
// detail, but this is the simplest way for now.
- inv []usermem.AddrRange
+ inv []hostarch.AddrRange
}
func (n *testMappingSpace) reset() {
- n.inv = []usermem.AddrRange{}
+ n.inv = []hostarch.AddrRange{}
}
-func (n *testMappingSpace) Invalidate(ar usermem.AddrRange, opts InvalidateOpts) {
+func (n *testMappingSpace) Invalidate(ar hostarch.AddrRange, opts InvalidateOpts) {
n.inv = append(n.inv, ar)
}
@@ -40,16 +39,16 @@ func TestAddRemoveMapping(t *testing.T) {
set := MappingSet{}
ms := &testMappingSpace{}
- mapped := set.AddMapping(ms, usermem.AddrRange{0x10000, 0x12000}, 0x1000, true)
+ mapped := set.AddMapping(ms, hostarch.AddrRange{0x10000, 0x12000}, 0x1000, true)
if got, want := mapped, []MappableRange{{0x1000, 0x3000}}; !reflect.DeepEqual(got, want) {
t.Errorf("AddMapping: got %+v, wanted %+v", got, want)
}
- // Mappings (usermem.AddrRanges => memmap.MappableRange):
+ // Mappings (hostarch.AddrRanges => memmap.MappableRange):
// [0x10000, 0x12000) => [0x1000, 0x3000)
t.Log(&set)
- mapped = set.AddMapping(ms, usermem.AddrRange{0x20000, 0x21000}, 0x2000, true)
+ mapped = set.AddMapping(ms, hostarch.AddrRange{0x20000, 0x21000}, 0x2000, true)
if len(mapped) != 0 {
t.Errorf("AddMapping: got %+v, wanted []", mapped)
}
@@ -59,7 +58,7 @@ func TestAddRemoveMapping(t *testing.T) {
// [0x11000, 0x12000) and [0x20000, 0x21000) => [0x2000, 0x3000)
t.Log(&set)
- mapped = set.AddMapping(ms, usermem.AddrRange{0x30000, 0x31000}, 0x4000, true)
+ mapped = set.AddMapping(ms, hostarch.AddrRange{0x30000, 0x31000}, 0x4000, true)
if got, want := mapped, []MappableRange{{0x4000, 0x5000}}; !reflect.DeepEqual(got, want) {
t.Errorf("AddMapping: got %+v, wanted %+v", got, want)
}
@@ -70,7 +69,7 @@ func TestAddRemoveMapping(t *testing.T) {
// [0x30000, 0x31000) => [0x4000, 0x5000)
t.Log(&set)
- mapped = set.AddMapping(ms, usermem.AddrRange{0x12000, 0x15000}, 0x3000, true)
+ mapped = set.AddMapping(ms, hostarch.AddrRange{0x12000, 0x15000}, 0x3000, true)
if got, want := mapped, []MappableRange{{0x3000, 0x4000}, {0x5000, 0x6000}}; !reflect.DeepEqual(got, want) {
t.Errorf("AddMapping: got %+v, wanted %+v", got, want)
}
@@ -83,7 +82,7 @@ func TestAddRemoveMapping(t *testing.T) {
// [0x14000, 0x15000) => [0x5000, 0x6000)
t.Log(&set)
- unmapped := set.RemoveMapping(ms, usermem.AddrRange{0x10000, 0x11000}, 0x1000, true)
+ unmapped := set.RemoveMapping(ms, hostarch.AddrRange{0x10000, 0x11000}, 0x1000, true)
if got, want := unmapped, []MappableRange{{0x1000, 0x2000}}; !reflect.DeepEqual(got, want) {
t.Errorf("RemoveMapping: got %+v, wanted %+v", got, want)
}
@@ -95,7 +94,7 @@ func TestAddRemoveMapping(t *testing.T) {
// [0x14000, 0x15000) => [0x5000, 0x6000)
t.Log(&set)
- unmapped = set.RemoveMapping(ms, usermem.AddrRange{0x20000, 0x21000}, 0x2000, true)
+ unmapped = set.RemoveMapping(ms, hostarch.AddrRange{0x20000, 0x21000}, 0x2000, true)
if len(unmapped) != 0 {
t.Errorf("RemoveMapping: got %+v, wanted []", unmapped)
}
@@ -106,7 +105,7 @@ func TestAddRemoveMapping(t *testing.T) {
// [0x14000, 0x15000) => [0x5000, 0x6000)
t.Log(&set)
- unmapped = set.RemoveMapping(ms, usermem.AddrRange{0x11000, 0x15000}, 0x2000, true)
+ unmapped = set.RemoveMapping(ms, hostarch.AddrRange{0x11000, 0x15000}, 0x2000, true)
if got, want := unmapped, []MappableRange{{0x2000, 0x4000}, {0x5000, 0x6000}}; !reflect.DeepEqual(got, want) {
t.Errorf("RemoveMapping: got %+v, wanted %+v", got, want)
}
@@ -115,7 +114,7 @@ func TestAddRemoveMapping(t *testing.T) {
// [0x30000, 0x31000) => [0x4000, 0x5000)
t.Log(&set)
- unmapped = set.RemoveMapping(ms, usermem.AddrRange{0x30000, 0x31000}, 0x4000, true)
+ unmapped = set.RemoveMapping(ms, hostarch.AddrRange{0x30000, 0x31000}, 0x4000, true)
if got, want := unmapped, []MappableRange{{0x4000, 0x5000}}; !reflect.DeepEqual(got, want) {
t.Errorf("RemoveMapping: got %+v, wanted %+v", got, want)
}
@@ -125,12 +124,12 @@ func TestInvalidateWholeMapping(t *testing.T) {
set := MappingSet{}
ms := &testMappingSpace{}
- set.AddMapping(ms, usermem.AddrRange{0x10000, 0x11000}, 0, true)
+ set.AddMapping(ms, hostarch.AddrRange{0x10000, 0x11000}, 0, true)
// Mappings:
// [0x10000, 0x11000) => [0, 0x1000)
t.Log(&set)
set.Invalidate(MappableRange{0, 0x1000}, InvalidateOpts{})
- if got, want := ms.inv, []usermem.AddrRange{{0x10000, 0x11000}}; !reflect.DeepEqual(got, want) {
+ if got, want := ms.inv, []hostarch.AddrRange{{0x10000, 0x11000}}; !reflect.DeepEqual(got, want) {
t.Errorf("Invalidate: got %+v, wanted %+v", got, want)
}
}
@@ -139,12 +138,12 @@ func TestInvalidatePartialMapping(t *testing.T) {
set := MappingSet{}
ms := &testMappingSpace{}
- set.AddMapping(ms, usermem.AddrRange{0x10000, 0x13000}, 0, true)
+ set.AddMapping(ms, hostarch.AddrRange{0x10000, 0x13000}, 0, true)
// Mappings:
// [0x10000, 0x13000) => [0, 0x3000)
t.Log(&set)
set.Invalidate(MappableRange{0x1000, 0x2000}, InvalidateOpts{})
- if got, want := ms.inv, []usermem.AddrRange{{0x11000, 0x12000}}; !reflect.DeepEqual(got, want) {
+ if got, want := ms.inv, []hostarch.AddrRange{{0x11000, 0x12000}}; !reflect.DeepEqual(got, want) {
t.Errorf("Invalidate: got %+v, wanted %+v", got, want)
}
}
@@ -153,14 +152,14 @@ func TestInvalidateMultipleMappings(t *testing.T) {
set := MappingSet{}
ms := &testMappingSpace{}
- set.AddMapping(ms, usermem.AddrRange{0x10000, 0x11000}, 0, true)
- set.AddMapping(ms, usermem.AddrRange{0x20000, 0x21000}, 0x2000, true)
+ set.AddMapping(ms, hostarch.AddrRange{0x10000, 0x11000}, 0, true)
+ set.AddMapping(ms, hostarch.AddrRange{0x20000, 0x21000}, 0x2000, true)
// Mappings:
// [0x10000, 0x11000) => [0, 0x1000)
// [0x12000, 0x13000) => [0x2000, 0x3000)
t.Log(&set)
set.Invalidate(MappableRange{0, 0x3000}, InvalidateOpts{})
- if got, want := ms.inv, []usermem.AddrRange{{0x10000, 0x11000}, {0x20000, 0x21000}}; !reflect.DeepEqual(got, want) {
+ if got, want := ms.inv, []hostarch.AddrRange{{0x10000, 0x11000}, {0x20000, 0x21000}}; !reflect.DeepEqual(got, want) {
t.Errorf("Invalidate: got %+v, wanted %+v", got, want)
}
}
@@ -170,17 +169,17 @@ func TestInvalidateOverlappingMappings(t *testing.T) {
ms1 := &testMappingSpace{}
ms2 := &testMappingSpace{}
- set.AddMapping(ms1, usermem.AddrRange{0x10000, 0x12000}, 0, true)
- set.AddMapping(ms2, usermem.AddrRange{0x20000, 0x22000}, 0x1000, true)
+ set.AddMapping(ms1, hostarch.AddrRange{0x10000, 0x12000}, 0, true)
+ set.AddMapping(ms2, hostarch.AddrRange{0x20000, 0x22000}, 0x1000, true)
// Mappings:
// ms1:[0x10000, 0x12000) => [0, 0x2000)
// ms2:[0x11000, 0x13000) => [0x1000, 0x3000)
t.Log(&set)
set.Invalidate(MappableRange{0x1000, 0x2000}, InvalidateOpts{})
- if got, want := ms1.inv, []usermem.AddrRange{{0x11000, 0x12000}}; !reflect.DeepEqual(got, want) {
+ if got, want := ms1.inv, []hostarch.AddrRange{{0x11000, 0x12000}}; !reflect.DeepEqual(got, want) {
t.Errorf("Invalidate: ms1: got %+v, wanted %+v", got, want)
}
- if got, want := ms2.inv, []usermem.AddrRange{{0x20000, 0x21000}}; !reflect.DeepEqual(got, want) {
+ if got, want := ms2.inv, []hostarch.AddrRange{{0x20000, 0x21000}}; !reflect.DeepEqual(got, want) {
t.Errorf("Invalidate: ms1: got %+v, wanted %+v", got, want)
}
}
@@ -189,7 +188,7 @@ func TestMixedWritableMappings(t *testing.T) {
set := MappingSet{}
ms := &testMappingSpace{}
- mapped := set.AddMapping(ms, usermem.AddrRange{0x10000, 0x12000}, 0x1000, true)
+ mapped := set.AddMapping(ms, hostarch.AddrRange{0x10000, 0x12000}, 0x1000, true)
if got, want := mapped, []MappableRange{{0x1000, 0x3000}}; !reflect.DeepEqual(got, want) {
t.Errorf("AddMapping: got %+v, wanted %+v", got, want)
}
@@ -198,7 +197,7 @@ func TestMixedWritableMappings(t *testing.T) {
// [0x10000, 0x12000) writable => [0x1000, 0x3000)
t.Log(&set)
- mapped = set.AddMapping(ms, usermem.AddrRange{0x20000, 0x22000}, 0x2000, false)
+ mapped = set.AddMapping(ms, hostarch.AddrRange{0x20000, 0x22000}, 0x2000, false)
if got, want := mapped, []MappableRange{{0x3000, 0x4000}}; !reflect.DeepEqual(got, want) {
t.Errorf("AddMapping: got %+v, wanted %+v", got, want)
}
@@ -211,14 +210,14 @@ func TestMixedWritableMappings(t *testing.T) {
// Unmap should fail because we specified the readonly map address range, but
// asked to unmap a writable segment.
- unmapped := set.RemoveMapping(ms, usermem.AddrRange{0x20000, 0x21000}, 0x2000, true)
+ unmapped := set.RemoveMapping(ms, hostarch.AddrRange{0x20000, 0x21000}, 0x2000, true)
if len(unmapped) != 0 {
t.Errorf("RemoveMapping: got %+v, wanted []", unmapped)
}
// Readonly mapping removed, but writable mapping still exists in the range,
// so no mappable range fully unmapped.
- unmapped = set.RemoveMapping(ms, usermem.AddrRange{0x20000, 0x21000}, 0x2000, false)
+ unmapped = set.RemoveMapping(ms, hostarch.AddrRange{0x20000, 0x21000}, 0x2000, false)
if len(unmapped) != 0 {
t.Errorf("RemoveMapping: got %+v, wanted []", unmapped)
}
@@ -228,7 +227,7 @@ func TestMixedWritableMappings(t *testing.T) {
// [0x21000, 0x22000) readonly => [0x3000, 0x4000)
t.Log(&set)
- unmapped = set.RemoveMapping(ms, usermem.AddrRange{0x11000, 0x12000}, 0x2000, true)
+ unmapped = set.RemoveMapping(ms, hostarch.AddrRange{0x11000, 0x12000}, 0x2000, true)
if got, want := unmapped, []MappableRange{{0x2000, 0x3000}}; !reflect.DeepEqual(got, want) {
t.Errorf("RemoveMapping: got %+v, wanted %+v", got, want)
}
@@ -239,12 +238,12 @@ func TestMixedWritableMappings(t *testing.T) {
t.Log(&set)
// Unmap should fail since writable bit doesn't match.
- unmapped = set.RemoveMapping(ms, usermem.AddrRange{0x10000, 0x12000}, 0x1000, false)
+ unmapped = set.RemoveMapping(ms, hostarch.AddrRange{0x10000, 0x12000}, 0x1000, false)
if len(unmapped) != 0 {
t.Errorf("RemoveMapping: got %+v, wanted []", unmapped)
}
- unmapped = set.RemoveMapping(ms, usermem.AddrRange{0x10000, 0x12000}, 0x1000, true)
+ unmapped = set.RemoveMapping(ms, hostarch.AddrRange{0x10000, 0x12000}, 0x1000, true)
if got, want := unmapped, []MappableRange{{0x1000, 0x2000}}; !reflect.DeepEqual(got, want) {
t.Errorf("RemoveMapping: got %+v, wanted %+v", got, want)
}
@@ -253,7 +252,7 @@ func TestMixedWritableMappings(t *testing.T) {
// [0x21000, 0x22000) readonly => [0x3000, 0x4000)
t.Log(&set)
- unmapped = set.RemoveMapping(ms, usermem.AddrRange{0x21000, 0x22000}, 0x3000, false)
+ unmapped = set.RemoveMapping(ms, hostarch.AddrRange{0x21000, 0x22000}, 0x3000, false)
if got, want := unmapped, []MappableRange{{0x3000, 0x4000}}; !reflect.DeepEqual(got, want) {
t.Errorf("RemoveMapping: got %+v, wanted %+v", got, want)
}
diff --git a/pkg/sentry/memmap/memmap.go b/pkg/sentry/memmap/memmap.go
index 49e21026e..610686ea0 100644
--- a/pkg/sentry/memmap/memmap.go
+++ b/pkg/sentry/memmap/memmap.go
@@ -19,8 +19,8 @@ import (
"fmt"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/safemem"
- "gvisor.dev/gvisor/pkg/usermem"
)
// Mappable represents a memory-mappable object, a mutable mapping from uint64
@@ -29,8 +29,8 @@ import (
// See mm/mm.go for Mappable's place in the lock order.
//
// All Mappable methods have the following preconditions:
-// * usermem.AddrRanges and MappableRanges must be non-empty (Length() != 0).
-// * usermem.Addrs and Mappable offsets must be page-aligned.
+// * hostarch.AddrRanges and MappableRanges must be non-empty (Length() != 0).
+// * hostarch.Addrs and Mappable offsets must be page-aligned.
type Mappable interface {
// AddMapping notifies the Mappable of a mapping from addresses ar in ms to
// offsets [offset, offset+ar.Length()) in this Mappable.
@@ -42,7 +42,7 @@ type Mappable interface {
// lifetime of the mapping.
//
// Preconditions: offset+ar.Length() does not overflow.
- AddMapping(ctx context.Context, ms MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) error
+ AddMapping(ctx context.Context, ms MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error
// RemoveMapping notifies the Mappable of the removal of a mapping from
// addresses ar in ms to offsets [offset, offset+ar.Length()) in this
@@ -52,7 +52,7 @@ type Mappable interface {
// * offset+ar.Length() does not overflow.
// * The removed mapping must exist. writable must match the
// corresponding call to AddMapping.
- RemoveMapping(ctx context.Context, ms MappingSpace, ar usermem.AddrRange, offset uint64, writable bool)
+ RemoveMapping(ctx context.Context, ms MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool)
// CopyMapping notifies the Mappable of an attempt to copy a mapping in ms
// from srcAR to dstAR. For most Mappables, this is equivalent to
@@ -66,7 +66,7 @@ type Mappable interface {
// * offset+srcAR.Length() and offset+dstAR.Length() do not overflow.
// * The mapping at srcAR must exist. writable must match the
// corresponding call to AddMapping.
- CopyMapping(ctx context.Context, ms MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, writable bool) error
+ CopyMapping(ctx context.Context, ms MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error
// Translate returns the Mappable's current mappings for at least the range
// of offsets specified by required, and at most the range of offsets
@@ -90,7 +90,7 @@ type Mappable interface {
// synchronize with invalidation.
//
// Postconditions: See CheckTranslateResult.
- Translate(ctx context.Context, required, optional MappableRange, at usermem.AccessType) ([]Translation, error)
+ Translate(ctx context.Context, required, optional MappableRange, at hostarch.AccessType) ([]Translation, error)
// InvalidateUnsavable requests that the Mappable invalidate Translations
// that cannot be preserved across save/restore.
@@ -113,7 +113,7 @@ type Translation struct {
// Perms is the set of permissions for which platform.AddressSpace.MapFile
// and platform.AddressSpace.MapInternal on this Translation is permitted.
- Perms usermem.AccessType
+ Perms hostarch.AccessType
}
// FileRange returns the FileRange represented by t.
@@ -125,18 +125,18 @@ func (t Translation) FileRange() FileRange {
// postconditions for Mappable.Translate(required, optional, at).
//
// Preconditions: Same as Mappable.Translate.
-func CheckTranslateResult(required, optional MappableRange, at usermem.AccessType, ts []Translation, terr error) error {
+func CheckTranslateResult(required, optional MappableRange, at hostarch.AccessType, ts []Translation, terr error) error {
// Verify that the inputs to Mappable.Translate were valid.
if !required.WellFormed() || required.Length() == 0 {
panic(fmt.Sprintf("invalid required range: %v", required))
}
- if !usermem.Addr(required.Start).IsPageAligned() || !usermem.Addr(required.End).IsPageAligned() {
+ if !hostarch.Addr(required.Start).IsPageAligned() || !hostarch.Addr(required.End).IsPageAligned() {
panic(fmt.Sprintf("unaligned required range: %v", required))
}
if !optional.IsSupersetOf(required) {
panic(fmt.Sprintf("optional range %v is not a superset of required range %v", optional, required))
}
- if !usermem.Addr(optional.Start).IsPageAligned() || !usermem.Addr(optional.End).IsPageAligned() {
+ if !hostarch.Addr(optional.Start).IsPageAligned() || !hostarch.Addr(optional.End).IsPageAligned() {
panic(fmt.Sprintf("unaligned optional range: %v", optional))
}
@@ -148,13 +148,13 @@ func CheckTranslateResult(required, optional MappableRange, at usermem.AccessTyp
if !t.Source.WellFormed() || t.Source.Length() == 0 {
return fmt.Errorf("Translation %+v has invalid Source", t)
}
- if !usermem.Addr(t.Source.Start).IsPageAligned() || !usermem.Addr(t.Source.End).IsPageAligned() {
+ if !hostarch.Addr(t.Source.Start).IsPageAligned() || !hostarch.Addr(t.Source.End).IsPageAligned() {
return fmt.Errorf("Translation %+v has unaligned Source", t)
}
if t.File == nil {
return fmt.Errorf("Translation %+v has nil File", t)
}
- if !usermem.Addr(t.Offset).IsPageAligned() {
+ if !hostarch.Addr(t.Offset).IsPageAligned() {
return fmt.Errorf("Translation %+v has unaligned Offset", t)
}
// Translations must be contiguous and in increasing order of
@@ -210,7 +210,7 @@ func (mr MappableRange) String() string {
return fmt.Sprintf("[%#x, %#x)", mr.Start, mr.End)
}
-// MappingSpace represents a mutable mapping from usermem.Addrs to (Mappable,
+// MappingSpace represents a mutable mapping from hostarch.Addrs to (Mappable,
// uint64 offset) pairs.
type MappingSpace interface {
// Invalidate is called to notify the MappingSpace that values returned by
@@ -223,7 +223,7 @@ type MappingSpace interface {
// Preconditions:
// * ar.Length() != 0.
// * ar must be page-aligned.
- Invalidate(ar usermem.AddrRange, opts InvalidateOpts)
+ Invalidate(ar hostarch.AddrRange, opts InvalidateOpts)
}
// InvalidateOpts holds options to MappingSpace.Invalidate.
@@ -321,7 +321,7 @@ type MMapOpts struct {
Offset uint64
// Addr is the suggested address for the mapping.
- Addr usermem.Addr
+ Addr hostarch.Addr
// Fixed specifies whether this is a fixed mapping (it must be located at
// Addr).
@@ -338,7 +338,7 @@ type MMapOpts struct {
Map32Bit bool
// Perms is the set of permissions to the applied to this mapping.
- Perms usermem.AccessType
+ Perms hostarch.AccessType
// MaxPerms limits the set of permissions that may ever apply to this
// mapping. If Mappable is not nil, all memmap.Translations returned by
@@ -346,7 +346,7 @@ type MMapOpts struct {
//
// Preconditions: MaxAccessType should be an effective AccessType, as
// access cannot be limited beyond effective AccessTypes.
- MaxPerms usermem.AccessType
+ MaxPerms hostarch.AccessType
// Private is true if writes to the mapping should be propagated to a copy
// that is exclusive to the MemoryManager.
@@ -375,6 +375,11 @@ type MMapOpts struct {
//
// If Force is true, Unmap and Fixed must be true.
Force bool
+
+ // SentryOwnedContent indicates the sentry exclusively controls the
+ // underlying memory backing the mapping thus the memory content is
+ // guaranteed not to be modified outside the sentry's purview.
+ SentryOwnedContent bool
}
// File represents a host file that may be mapped into an platform.AddressSpace.
@@ -410,7 +415,7 @@ type File interface {
//
// Postconditions: The returned mapping is valid as long as at least one
// reference is held on the mapped pages.
- MapInternal(fr FileRange, at usermem.AccessType) (safemem.BlockSeq, error)
+ MapInternal(fr FileRange, at hostarch.AccessType) (safemem.BlockSeq, error)
// FD returns the file descriptor represented by the File.
//
diff --git a/pkg/sentry/mm/BUILD b/pkg/sentry/mm/BUILD
index 6dbeccfe2..b417c2da7 100644
--- a/pkg/sentry/mm/BUILD
+++ b/pkg/sentry/mm/BUILD
@@ -28,14 +28,14 @@ go_template_instance(
"trackGaps": "1",
},
imports = {
- "usermem": "gvisor.dev/gvisor/pkg/usermem",
+ "hostarch": "gvisor.dev/gvisor/pkg/hostarch",
},
package = "mm",
prefix = "vma",
template = "//pkg/segment:generic_set",
types = {
- "Key": "usermem.Addr",
- "Range": "usermem.AddrRange",
+ "Key": "hostarch.Addr",
+ "Range": "hostarch.AddrRange",
"Value": "vma",
"Functions": "vmaSetFunctions",
},
@@ -48,14 +48,14 @@ go_template_instance(
"minDegree": "8",
},
imports = {
- "usermem": "gvisor.dev/gvisor/pkg/usermem",
+ "hostarch": "gvisor.dev/gvisor/pkg/hostarch",
},
package = "mm",
prefix = "pma",
template = "//pkg/segment:generic_set",
types = {
- "Key": "usermem.Addr",
- "Range": "usermem.AddrRange",
+ "Key": "hostarch.Addr",
+ "Range": "hostarch.AddrRange",
"Value": "pma",
"Functions": "pmaSetFunctions",
},
@@ -125,6 +125,7 @@ go_library(
"//pkg/abi/linux",
"//pkg/atomicbitops",
"//pkg/context",
+ "//pkg/hostarch",
"//pkg/log",
"//pkg/refs",
"//pkg/refsvfs2",
@@ -155,6 +156,7 @@ go_test(
library = ":mm",
deps = [
"//pkg/context",
+ "//pkg/hostarch",
"//pkg/sentry/arch",
"//pkg/sentry/contexttest",
"//pkg/sentry/limits",
diff --git a/pkg/sentry/mm/address_space.go b/pkg/sentry/mm/address_space.go
index a93e76c75..534e0e957 100644
--- a/pkg/sentry/mm/address_space.go
+++ b/pkg/sentry/mm/address_space.go
@@ -19,8 +19,8 @@ import (
"sync/atomic"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/platform"
- "gvisor.dev/gvisor/pkg/usermem"
)
// AddressSpace returns the platform.AddressSpace bound to mm.
@@ -172,17 +172,17 @@ func (mm *MemoryManager) Deactivate() {
// * ar.Length() != 0.
// * ar must be page-aligned.
// * pseg == mm.pmas.LowerBoundSegment(ar.Start).
-func (mm *MemoryManager) mapASLocked(pseg pmaIterator, ar usermem.AddrRange, precommit bool) error {
+func (mm *MemoryManager) mapASLocked(pseg pmaIterator, ar hostarch.AddrRange, precommit bool) error {
// By default, map entire pmas at a time, under the assumption that there
// is no cost to mapping more of a pma than necessary.
- mapAR := usermem.AddrRange{0, ^usermem.Addr(usermem.PageSize - 1)}
+ mapAR := hostarch.AddrRange{0, ^hostarch.Addr(hostarch.PageSize - 1)}
if precommit {
// When explicitly precommitting, only map ar, since overmapping may
// incur unexpected resource usage.
mapAR = ar
} else if mapUnit := mm.p.MapUnit(); mapUnit != 0 {
// Limit the range we map to ar, aligned to mapUnit.
- mapMask := usermem.Addr(mapUnit - 1)
+ mapMask := hostarch.Addr(mapUnit - 1)
mapAR.Start = ar.Start &^ mapMask
// If rounding ar.End up overflows, just keep the existing mapAR.End.
if end := (ar.End + mapMask) &^ mapMask; end >= ar.End {
@@ -218,7 +218,7 @@ func (mm *MemoryManager) mapASLocked(pseg pmaIterator, ar usermem.AddrRange, pre
// unmapASLocked removes all AddressSpace mappings for addresses in ar.
//
// Preconditions: mm.activeMu must be locked.
-func (mm *MemoryManager) unmapASLocked(ar usermem.AddrRange) {
+func (mm *MemoryManager) unmapASLocked(ar hostarch.AddrRange) {
if mm.as == nil {
// No AddressSpace? Force all mappings to be unmapped on the next
// Activate.
diff --git a/pkg/sentry/mm/aio_context.go b/pkg/sentry/mm/aio_context.go
index 5ab2ef79f..346866d3c 100644
--- a/pkg/sentry/mm/aio_context.go
+++ b/pkg/sentry/mm/aio_context.go
@@ -17,6 +17,7 @@ package mm
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/pgalloc"
"gvisor.dev/gvisor/pkg/sentry/usage"
@@ -83,7 +84,7 @@ func (mm *MemoryManager) destroyAIOContextLocked(ctx context.Context, id uint64)
// the same address. Then it would be unmapping memory that it doesn't own.
// This is, however, the way Linux implements AIO. Keeps the same [weird]
// semantics in case anyone relies on it.
- mm.MUnmap(ctx, usermem.Addr(id), aioRingBufferSize)
+ mm.MUnmap(ctx, hostarch.Addr(id), aioRingBufferSize)
delete(mm.aioManager.contexts, id)
aioCtx.destroy()
@@ -259,7 +260,7 @@ type aioMappable struct {
fr memmap.FileRange
}
-var aioRingBufferSize = uint64(usermem.Addr(linux.AIORingSize).MustRoundUp())
+var aioRingBufferSize = uint64(hostarch.Addr(linux.AIORingSize).MustRoundUp())
func newAIOMappable(mfp pgalloc.MemoryFileProvider) (*aioMappable, error) {
fr, err := mfp.MemoryFile().Allocate(aioRingBufferSize, usage.Anonymous)
@@ -300,7 +301,7 @@ func (m *aioMappable) Msync(ctx context.Context, mr memmap.MappableRange) error
}
// AddMapping implements memmap.Mappable.AddMapping.
-func (m *aioMappable) AddMapping(_ context.Context, _ memmap.MappingSpace, ar usermem.AddrRange, offset uint64, _ bool) error {
+func (m *aioMappable) AddMapping(_ context.Context, _ memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, _ bool) error {
// Don't allow mappings to be expanded (in Linux, fs/aio.c:aio_ring_mmap()
// sets VM_DONTEXPAND).
if offset != 0 || uint64(ar.Length()) != aioRingBufferSize {
@@ -310,11 +311,11 @@ func (m *aioMappable) AddMapping(_ context.Context, _ memmap.MappingSpace, ar us
}
// RemoveMapping implements memmap.Mappable.RemoveMapping.
-func (m *aioMappable) RemoveMapping(context.Context, memmap.MappingSpace, usermem.AddrRange, uint64, bool) {
+func (m *aioMappable) RemoveMapping(context.Context, memmap.MappingSpace, hostarch.AddrRange, uint64, bool) {
}
// CopyMapping implements memmap.Mappable.CopyMapping.
-func (m *aioMappable) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, _ bool) error {
+func (m *aioMappable) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, _ bool) error {
// Don't allow mappings to be expanded (in Linux, fs/aio.c:aio_ring_mmap()
// sets VM_DONTEXPAND).
if offset != 0 || uint64(dstAR.Length()) != aioRingBufferSize {
@@ -346,7 +347,7 @@ func (m *aioMappable) CopyMapping(ctx context.Context, ms memmap.MappingSpace, s
}
// Translate implements memmap.Mappable.Translate.
-func (m *aioMappable) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) {
+func (m *aioMappable) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) {
var err error
if required.End > m.fr.Length() {
err = &memmap.BusError{syserror.EFAULT}
@@ -357,7 +358,7 @@ func (m *aioMappable) Translate(ctx context.Context, required, optional memmap.M
Source: source,
File: m.mfp.MemoryFile(),
Offset: m.fr.Start + source.Start,
- Perms: usermem.AnyAccess,
+ Perms: hostarch.AnyAccess,
},
}, err
}
@@ -389,8 +390,8 @@ func (mm *MemoryManager) NewAIOContext(ctx context.Context, events uint32) (uint
// Linux uses "do_mmap_pgoff(..., PROT_READ | PROT_WRITE, ...)" in
// fs/aio.c:aio_setup_ring(). Since we don't implement AIO_RING_MAGIC,
// user mode should not write to this page.
- Perms: usermem.Read,
- MaxPerms: usermem.Read,
+ Perms: hostarch.Read,
+ MaxPerms: hostarch.Read,
})
if err != nil {
return 0, err
@@ -435,6 +436,6 @@ func (mm *MemoryManager) LookupAIOContext(ctx context.Context, id uint64) (*AIOC
// bytes from id).
func (mm *MemoryManager) isValidAddr(ctx context.Context, id uint64) bool {
var buf [4]byte
- _, err := mm.CopyIn(ctx, usermem.Addr(id), buf[:], usermem.IOOpts{})
+ _, err := mm.CopyIn(ctx, hostarch.Addr(id), buf[:], usermem.IOOpts{})
return err == nil
}
diff --git a/pkg/sentry/mm/io.go b/pkg/sentry/mm/io.go
index a8ac48080..16f318ab3 100644
--- a/pkg/sentry/mm/io.go
+++ b/pkg/sentry/mm/io.go
@@ -16,6 +16,7 @@ package mm
import (
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/platform"
"gvisor.dev/gvisor/pkg/syserror"
@@ -60,11 +61,11 @@ const (
rwMapMinBytes = 512
)
-// CheckIORange is similar to usermem.Addr.ToRange, but applies bounds checks
+// CheckIORange is similar to hostarch.Addr.ToRange, but applies bounds checks
// consistent with Linux's arch/x86/include/asm/uaccess.h:access_ok().
//
// Preconditions: length >= 0.
-func (mm *MemoryManager) CheckIORange(addr usermem.Addr, length int64) (usermem.AddrRange, bool) {
+func (mm *MemoryManager) CheckIORange(addr hostarch.Addr, length int64) (hostarch.AddrRange, bool) {
// Note that access_ok() constrains end even if length == 0.
ar, ok := addr.ToRange(uint64(length))
return ar, (ok && ar.End <= mm.layout.MaxAddr)
@@ -72,7 +73,7 @@ func (mm *MemoryManager) CheckIORange(addr usermem.Addr, length int64) (usermem.
// checkIOVec applies bound checks consistent with Linux's
// arch/x86/include/asm/uaccess.h:access_ok() to ars.
-func (mm *MemoryManager) checkIOVec(ars usermem.AddrRangeSeq) bool {
+func (mm *MemoryManager) checkIOVec(ars hostarch.AddrRangeSeq) bool {
for !ars.IsEmpty() {
ar := ars.Head()
if _, ok := mm.CheckIORange(ar.Start, int64(ar.Length())); !ok {
@@ -100,7 +101,7 @@ func translateIOError(ctx context.Context, err error) error {
}
// CopyOut implements usermem.IO.CopyOut.
-func (mm *MemoryManager) CopyOut(ctx context.Context, addr usermem.Addr, src []byte, opts usermem.IOOpts) (int, error) {
+func (mm *MemoryManager) CopyOut(ctx context.Context, addr hostarch.Addr, src []byte, opts usermem.IOOpts) (int, error) {
ar, ok := mm.CheckIORange(addr, int64(len(src)))
if !ok {
return 0, syserror.EFAULT
@@ -116,24 +117,24 @@ func (mm *MemoryManager) CopyOut(ctx context.Context, addr usermem.Addr, src []b
}
// Go through internal mappings.
- n64, err := mm.withInternalMappings(ctx, ar, usermem.Write, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) {
+ n64, err := mm.withInternalMappings(ctx, ar, hostarch.Write, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) {
n, err := safemem.CopySeq(ims, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(src)))
return n, translateIOError(ctx, err)
})
return int(n64), err
}
-func (mm *MemoryManager) asCopyOut(ctx context.Context, addr usermem.Addr, src []byte) (int, error) {
+func (mm *MemoryManager) asCopyOut(ctx context.Context, addr hostarch.Addr, src []byte) (int, error) {
var done int
for {
- n, err := mm.as.CopyOut(addr+usermem.Addr(done), src[done:])
+ n, err := mm.as.CopyOut(addr+hostarch.Addr(done), src[done:])
done += n
if err == nil {
return done, nil
}
if f, ok := err.(platform.SegmentationFault); ok {
ar, _ := addr.ToRange(uint64(len(src)))
- if err := mm.handleASIOFault(ctx, f.Addr, ar, usermem.Write); err != nil {
+ if err := mm.handleASIOFault(ctx, f.Addr, ar, hostarch.Write); err != nil {
return done, err
}
continue
@@ -143,7 +144,7 @@ func (mm *MemoryManager) asCopyOut(ctx context.Context, addr usermem.Addr, src [
}
// CopyIn implements usermem.IO.CopyIn.
-func (mm *MemoryManager) CopyIn(ctx context.Context, addr usermem.Addr, dst []byte, opts usermem.IOOpts) (int, error) {
+func (mm *MemoryManager) CopyIn(ctx context.Context, addr hostarch.Addr, dst []byte, opts usermem.IOOpts) (int, error) {
ar, ok := mm.CheckIORange(addr, int64(len(dst)))
if !ok {
return 0, syserror.EFAULT
@@ -159,24 +160,24 @@ func (mm *MemoryManager) CopyIn(ctx context.Context, addr usermem.Addr, dst []by
}
// Go through internal mappings.
- n64, err := mm.withInternalMappings(ctx, ar, usermem.Read, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) {
+ n64, err := mm.withInternalMappings(ctx, ar, hostarch.Read, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) {
n, err := safemem.CopySeq(safemem.BlockSeqOf(safemem.BlockFromSafeSlice(dst)), ims)
return n, translateIOError(ctx, err)
})
return int(n64), err
}
-func (mm *MemoryManager) asCopyIn(ctx context.Context, addr usermem.Addr, dst []byte) (int, error) {
+func (mm *MemoryManager) asCopyIn(ctx context.Context, addr hostarch.Addr, dst []byte) (int, error) {
var done int
for {
- n, err := mm.as.CopyIn(addr+usermem.Addr(done), dst[done:])
+ n, err := mm.as.CopyIn(addr+hostarch.Addr(done), dst[done:])
done += n
if err == nil {
return done, nil
}
if f, ok := err.(platform.SegmentationFault); ok {
ar, _ := addr.ToRange(uint64(len(dst)))
- if err := mm.handleASIOFault(ctx, f.Addr, ar, usermem.Read); err != nil {
+ if err := mm.handleASIOFault(ctx, f.Addr, ar, hostarch.Read); err != nil {
return done, err
}
continue
@@ -186,7 +187,7 @@ func (mm *MemoryManager) asCopyIn(ctx context.Context, addr usermem.Addr, dst []
}
// ZeroOut implements usermem.IO.ZeroOut.
-func (mm *MemoryManager) ZeroOut(ctx context.Context, addr usermem.Addr, toZero int64, opts usermem.IOOpts) (int64, error) {
+func (mm *MemoryManager) ZeroOut(ctx context.Context, addr hostarch.Addr, toZero int64, opts usermem.IOOpts) (int64, error) {
ar, ok := mm.CheckIORange(addr, toZero)
if !ok {
return 0, syserror.EFAULT
@@ -202,23 +203,23 @@ func (mm *MemoryManager) ZeroOut(ctx context.Context, addr usermem.Addr, toZero
}
// Go through internal mappings.
- return mm.withInternalMappings(ctx, ar, usermem.Write, opts.IgnorePermissions, func(dsts safemem.BlockSeq) (uint64, error) {
+ return mm.withInternalMappings(ctx, ar, hostarch.Write, opts.IgnorePermissions, func(dsts safemem.BlockSeq) (uint64, error) {
n, err := safemem.ZeroSeq(dsts)
return n, translateIOError(ctx, err)
})
}
-func (mm *MemoryManager) asZeroOut(ctx context.Context, addr usermem.Addr, toZero int64) (int64, error) {
+func (mm *MemoryManager) asZeroOut(ctx context.Context, addr hostarch.Addr, toZero int64) (int64, error) {
var done int64
for {
- n, err := mm.as.ZeroOut(addr+usermem.Addr(done), uintptr(toZero-done))
+ n, err := mm.as.ZeroOut(addr+hostarch.Addr(done), uintptr(toZero-done))
done += int64(n)
if err == nil {
return done, nil
}
if f, ok := err.(platform.SegmentationFault); ok {
ar, _ := addr.ToRange(uint64(toZero))
- if err := mm.handleASIOFault(ctx, f.Addr, ar, usermem.Write); err != nil {
+ if err := mm.handleASIOFault(ctx, f.Addr, ar, hostarch.Write); err != nil {
return done, err
}
continue
@@ -228,7 +229,7 @@ func (mm *MemoryManager) asZeroOut(ctx context.Context, addr usermem.Addr, toZer
}
// CopyOutFrom implements usermem.IO.CopyOutFrom.
-func (mm *MemoryManager) CopyOutFrom(ctx context.Context, ars usermem.AddrRangeSeq, src safemem.Reader, opts usermem.IOOpts) (int64, error) {
+func (mm *MemoryManager) CopyOutFrom(ctx context.Context, ars hostarch.AddrRangeSeq, src safemem.Reader, opts usermem.IOOpts) (int64, error) {
if !mm.checkIOVec(ars) {
return 0, syserror.EFAULT
}
@@ -269,11 +270,11 @@ func (mm *MemoryManager) CopyOutFrom(ctx context.Context, ars usermem.AddrRangeS
}
// Go through internal mappings.
- return mm.withVecInternalMappings(ctx, ars, usermem.Write, opts.IgnorePermissions, src.ReadToBlocks)
+ return mm.withVecInternalMappings(ctx, ars, hostarch.Write, opts.IgnorePermissions, src.ReadToBlocks)
}
// CopyInTo implements usermem.IO.CopyInTo.
-func (mm *MemoryManager) CopyInTo(ctx context.Context, ars usermem.AddrRangeSeq, dst safemem.Writer, opts usermem.IOOpts) (int64, error) {
+func (mm *MemoryManager) CopyInTo(ctx context.Context, ars hostarch.AddrRangeSeq, dst safemem.Writer, opts usermem.IOOpts) (int64, error) {
if !mm.checkIOVec(ars) {
return 0, syserror.EFAULT
}
@@ -306,11 +307,11 @@ func (mm *MemoryManager) CopyInTo(ctx context.Context, ars usermem.AddrRangeSeq,
}
// Go through internal mappings.
- return mm.withVecInternalMappings(ctx, ars, usermem.Read, opts.IgnorePermissions, dst.WriteFromBlocks)
+ return mm.withVecInternalMappings(ctx, ars, hostarch.Read, opts.IgnorePermissions, dst.WriteFromBlocks)
}
// SwapUint32 implements usermem.IO.SwapUint32.
-func (mm *MemoryManager) SwapUint32(ctx context.Context, addr usermem.Addr, new uint32, opts usermem.IOOpts) (uint32, error) {
+func (mm *MemoryManager) SwapUint32(ctx context.Context, addr hostarch.Addr, new uint32, opts usermem.IOOpts) (uint32, error) {
ar, ok := mm.CheckIORange(addr, 4)
if !ok {
return 0, syserror.EFAULT
@@ -324,7 +325,7 @@ func (mm *MemoryManager) SwapUint32(ctx context.Context, addr usermem.Addr, new
return old, nil
}
if f, ok := err.(platform.SegmentationFault); ok {
- if err := mm.handleASIOFault(ctx, f.Addr, ar, usermem.ReadWrite); err != nil {
+ if err := mm.handleASIOFault(ctx, f.Addr, ar, hostarch.ReadWrite); err != nil {
return 0, err
}
continue
@@ -335,7 +336,7 @@ func (mm *MemoryManager) SwapUint32(ctx context.Context, addr usermem.Addr, new
// Go through internal mappings.
var old uint32
- _, err := mm.withInternalMappings(ctx, ar, usermem.ReadWrite, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) {
+ _, err := mm.withInternalMappings(ctx, ar, hostarch.ReadWrite, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) {
if ims.NumBlocks() != 1 || ims.NumBytes() != 4 {
// Atomicity is unachievable across mappings.
return 0, syserror.EFAULT
@@ -353,7 +354,7 @@ func (mm *MemoryManager) SwapUint32(ctx context.Context, addr usermem.Addr, new
}
// CompareAndSwapUint32 implements usermem.IO.CompareAndSwapUint32.
-func (mm *MemoryManager) CompareAndSwapUint32(ctx context.Context, addr usermem.Addr, old, new uint32, opts usermem.IOOpts) (uint32, error) {
+func (mm *MemoryManager) CompareAndSwapUint32(ctx context.Context, addr hostarch.Addr, old, new uint32, opts usermem.IOOpts) (uint32, error) {
ar, ok := mm.CheckIORange(addr, 4)
if !ok {
return 0, syserror.EFAULT
@@ -367,7 +368,7 @@ func (mm *MemoryManager) CompareAndSwapUint32(ctx context.Context, addr usermem.
return prev, nil
}
if f, ok := err.(platform.SegmentationFault); ok {
- if err := mm.handleASIOFault(ctx, f.Addr, ar, usermem.ReadWrite); err != nil {
+ if err := mm.handleASIOFault(ctx, f.Addr, ar, hostarch.ReadWrite); err != nil {
return 0, err
}
continue
@@ -378,7 +379,7 @@ func (mm *MemoryManager) CompareAndSwapUint32(ctx context.Context, addr usermem.
// Go through internal mappings.
var prev uint32
- _, err := mm.withInternalMappings(ctx, ar, usermem.ReadWrite, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) {
+ _, err := mm.withInternalMappings(ctx, ar, hostarch.ReadWrite, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) {
if ims.NumBlocks() != 1 || ims.NumBytes() != 4 {
// Atomicity is unachievable across mappings.
return 0, syserror.EFAULT
@@ -396,7 +397,7 @@ func (mm *MemoryManager) CompareAndSwapUint32(ctx context.Context, addr usermem.
}
// LoadUint32 implements usermem.IO.LoadUint32.
-func (mm *MemoryManager) LoadUint32(ctx context.Context, addr usermem.Addr, opts usermem.IOOpts) (uint32, error) {
+func (mm *MemoryManager) LoadUint32(ctx context.Context, addr hostarch.Addr, opts usermem.IOOpts) (uint32, error) {
ar, ok := mm.CheckIORange(addr, 4)
if !ok {
return 0, syserror.EFAULT
@@ -410,7 +411,7 @@ func (mm *MemoryManager) LoadUint32(ctx context.Context, addr usermem.Addr, opts
return val, nil
}
if f, ok := err.(platform.SegmentationFault); ok {
- if err := mm.handleASIOFault(ctx, f.Addr, ar, usermem.Read); err != nil {
+ if err := mm.handleASIOFault(ctx, f.Addr, ar, hostarch.Read); err != nil {
return 0, err
}
continue
@@ -421,7 +422,7 @@ func (mm *MemoryManager) LoadUint32(ctx context.Context, addr usermem.Addr, opts
// Go through internal mappings.
var val uint32
- _, err := mm.withInternalMappings(ctx, ar, usermem.Read, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) {
+ _, err := mm.withInternalMappings(ctx, ar, hostarch.Read, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) {
if ims.NumBlocks() != 1 || ims.NumBytes() != 4 {
// Atomicity is unachievable across mappings.
return 0, syserror.EFAULT
@@ -445,11 +446,11 @@ func (mm *MemoryManager) LoadUint32(ctx context.Context, addr usermem.Addr, opts
// * mm.as != nil.
// * ioar.Length() != 0.
// * ioar.Contains(addr).
-func (mm *MemoryManager) handleASIOFault(ctx context.Context, addr usermem.Addr, ioar usermem.AddrRange, at usermem.AccessType) error {
+func (mm *MemoryManager) handleASIOFault(ctx context.Context, addr hostarch.Addr, ioar hostarch.AddrRange, at hostarch.AccessType) error {
// Try to map all remaining pages in the I/O operation. This RoundUp can't
// overflow because otherwise it would have been caught by CheckIORange.
end, _ := ioar.End.RoundUp()
- ar := usermem.AddrRange{addr.RoundDown(), end}
+ ar := hostarch.AddrRange{addr.RoundDown(), end}
// Don't bother trying existingPMAsLocked; in most cases, if we did have
// existing pmas, we wouldn't have faulted.
@@ -498,7 +499,7 @@ func (mm *MemoryManager) handleASIOFault(ctx context.Context, addr usermem.Addr,
// more useful for usermem.IO methods.
//
// Preconditions: 0 < ar.Length() <= math.MaxInt64.
-func (mm *MemoryManager) withInternalMappings(ctx context.Context, ar usermem.AddrRange, at usermem.AccessType, ignorePermissions bool, f func(safemem.BlockSeq) (uint64, error)) (int64, error) {
+func (mm *MemoryManager) withInternalMappings(ctx context.Context, ar hostarch.AddrRange, at hostarch.AccessType, ignorePermissions bool, f func(safemem.BlockSeq) (uint64, error)) (int64, error) {
// If pmas are already available, we can do IO without touching mm.vmas or
// mm.mappingMu.
mm.activeMu.RLock()
@@ -567,7 +568,7 @@ func (mm *MemoryManager) withInternalMappings(ctx context.Context, ar usermem.Ad
// internal mappings for the subset of ars for which this property holds.
//
// Preconditions: !ars.IsEmpty().
-func (mm *MemoryManager) withVecInternalMappings(ctx context.Context, ars usermem.AddrRangeSeq, at usermem.AccessType, ignorePermissions bool, f func(safemem.BlockSeq) (uint64, error)) (int64, error) {
+func (mm *MemoryManager) withVecInternalMappings(ctx context.Context, ars hostarch.AddrRangeSeq, at hostarch.AccessType, ignorePermissions bool, f func(safemem.BlockSeq) (uint64, error)) (int64, error) {
// withInternalMappings is faster than withVecInternalMappings because of
// iterator plumbing (this isn't generally practical in the vector case due
// to iterator invalidation between AddrRanges). Use it if possible.
@@ -630,12 +631,12 @@ func (mm *MemoryManager) withVecInternalMappings(ctx context.Context, ars userme
// truncatedAddrRangeSeq returns a copy of ars, but with the end truncated to
// at most address end on AddrRange arsit.Head(). It is used in vector I/O paths to
-// truncate usermem.AddrRangeSeq when errors occur.
+// truncate hostarch.AddrRangeSeq when errors occur.
//
// Preconditions:
// * !arsit.IsEmpty().
// * end <= arsit.Head().End.
-func truncatedAddrRangeSeq(ars, arsit usermem.AddrRangeSeq, end usermem.Addr) usermem.AddrRangeSeq {
+func truncatedAddrRangeSeq(ars, arsit hostarch.AddrRangeSeq, end hostarch.Addr) hostarch.AddrRangeSeq {
ar := arsit.Head()
if end <= ar.Start {
return ars.TakeFirst64(ars.NumBytes() - arsit.NumBytes())
diff --git a/pkg/sentry/mm/lifecycle.go b/pkg/sentry/mm/lifecycle.go
index 120707429..a79ef9223 100644
--- a/pkg/sentry/mm/lifecycle.go
+++ b/pkg/sentry/mm/lifecycle.go
@@ -19,12 +19,12 @@ import (
"sync/atomic"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/limits"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/pgalloc"
"gvisor.dev/gvisor/pkg/sentry/platform"
- "gvisor.dev/gvisor/pkg/usermem"
)
// NewMemoryManager returns a new MemoryManager with no mappings and 1 user.
@@ -139,7 +139,7 @@ func (mm *MemoryManager) Fork(ctx context.Context) (*MemoryManager, error) {
}
srcvseg := mm.vmas.FirstSegment()
dstpgap := mm2.pmas.FirstGap()
- var unmapAR usermem.AddrRange
+ var unmapAR hostarch.AddrRange
for srcpseg := mm.pmas.FirstSegment(); srcpseg.Ok(); srcpseg = srcpseg.NextSegment() {
pma := srcpseg.ValuePtr()
if !pma.private {
diff --git a/pkg/sentry/mm/metadata.go b/pkg/sentry/mm/metadata.go
index 0cfd60f6c..28c5fead9 100644
--- a/pkg/sentry/mm/metadata.go
+++ b/pkg/sentry/mm/metadata.go
@@ -16,9 +16,9 @@ package mm
import (
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fsbridge"
- "gvisor.dev/gvisor/pkg/usermem"
)
// Dumpability describes if and how core dumps should be created.
@@ -54,14 +54,14 @@ func (mm *MemoryManager) SetDumpability(d Dumpability) {
// ArgvStart returns the start of the application argument vector.
//
// There is no guarantee that this value is sensible w.r.t. ArgvEnd.
-func (mm *MemoryManager) ArgvStart() usermem.Addr {
+func (mm *MemoryManager) ArgvStart() hostarch.Addr {
mm.metadataMu.Lock()
defer mm.metadataMu.Unlock()
return mm.argv.Start
}
// SetArgvStart sets the start of the application argument vector.
-func (mm *MemoryManager) SetArgvStart(a usermem.Addr) {
+func (mm *MemoryManager) SetArgvStart(a hostarch.Addr) {
mm.metadataMu.Lock()
defer mm.metadataMu.Unlock()
mm.argv.Start = a
@@ -70,14 +70,14 @@ func (mm *MemoryManager) SetArgvStart(a usermem.Addr) {
// ArgvEnd returns the end of the application argument vector.
//
// There is no guarantee that this value is sensible w.r.t. ArgvStart.
-func (mm *MemoryManager) ArgvEnd() usermem.Addr {
+func (mm *MemoryManager) ArgvEnd() hostarch.Addr {
mm.metadataMu.Lock()
defer mm.metadataMu.Unlock()
return mm.argv.End
}
// SetArgvEnd sets the end of the application argument vector.
-func (mm *MemoryManager) SetArgvEnd(a usermem.Addr) {
+func (mm *MemoryManager) SetArgvEnd(a hostarch.Addr) {
mm.metadataMu.Lock()
defer mm.metadataMu.Unlock()
mm.argv.End = a
@@ -86,14 +86,14 @@ func (mm *MemoryManager) SetArgvEnd(a usermem.Addr) {
// EnvvStart returns the start of the application environment vector.
//
// There is no guarantee that this value is sensible w.r.t. EnvvEnd.
-func (mm *MemoryManager) EnvvStart() usermem.Addr {
+func (mm *MemoryManager) EnvvStart() hostarch.Addr {
mm.metadataMu.Lock()
defer mm.metadataMu.Unlock()
return mm.envv.Start
}
// SetEnvvStart sets the start of the application environment vector.
-func (mm *MemoryManager) SetEnvvStart(a usermem.Addr) {
+func (mm *MemoryManager) SetEnvvStart(a hostarch.Addr) {
mm.metadataMu.Lock()
defer mm.metadataMu.Unlock()
mm.envv.Start = a
@@ -102,14 +102,14 @@ func (mm *MemoryManager) SetEnvvStart(a usermem.Addr) {
// EnvvEnd returns the end of the application environment vector.
//
// There is no guarantee that this value is sensible w.r.t. EnvvStart.
-func (mm *MemoryManager) EnvvEnd() usermem.Addr {
+func (mm *MemoryManager) EnvvEnd() hostarch.Addr {
mm.metadataMu.Lock()
defer mm.metadataMu.Unlock()
return mm.envv.End
}
// SetEnvvEnd sets the end of the application environment vector.
-func (mm *MemoryManager) SetEnvvEnd(a usermem.Addr) {
+func (mm *MemoryManager) SetEnvvEnd(a hostarch.Addr) {
mm.metadataMu.Lock()
defer mm.metadataMu.Unlock()
mm.envv.End = a
diff --git a/pkg/sentry/mm/mm.go b/pkg/sentry/mm/mm.go
index 92cc87d84..57969b26c 100644
--- a/pkg/sentry/mm/mm.go
+++ b/pkg/sentry/mm/mm.go
@@ -36,6 +36,7 @@ package mm
import (
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fsbridge"
@@ -43,7 +44,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/pgalloc"
"gvisor.dev/gvisor/pkg/sentry/platform"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/usermem"
)
// MemoryManager implements a virtual address space.
@@ -97,7 +97,7 @@ type MemoryManager struct {
// binary into the mm.
//
// brk is protected by mappingMu.
- brk usermem.AddrRange
+ brk hostarch.AddrRange
// usageAS is vmas.Span(), cached to accelerate RLIMIT_AS checks.
//
@@ -198,14 +198,14 @@ type MemoryManager struct {
// requirements apply to argv; we do not require that argv.WellFormed().
//
// argv is protected by metadataMu.
- argv usermem.AddrRange
+ argv hostarch.AddrRange
// envv is the application envv. This is set up by the loader and may be
// modified by prctl(PR_SET_MM_ENV_START/PR_SET_MM_ENV_END). No
// requirements apply to envv; we do not require that envv.WellFormed().
//
// envv is protected by metadataMu.
- envv usermem.AddrRange
+ envv hostarch.AddrRange
// auxv is the ELF's auxiliary vector.
//
@@ -268,20 +268,20 @@ type vma struct {
// realPerms are the memory permissions on this vma, as defined by the
// application.
- realPerms usermem.AccessType `state:".(int)"`
+ realPerms hostarch.AccessType `state:".(int)"`
// effectivePerms are the memory permissions on this vma which are
// actually used to control access.
//
// Invariant: effectivePerms == realPerms.Effective().
- effectivePerms usermem.AccessType `state:"manual"`
+ effectivePerms hostarch.AccessType `state:"manual"`
// maxPerms limits the set of permissions that may ever apply to this
// memory, as well as accesses for which usermem.IOOpts.IgnorePermissions
// is true (e.g. ptrace(PTRACE_POKEDATA)).
//
// Invariant: maxPerms == maxPerms.Effective().
- maxPerms usermem.AccessType `state:"manual"`
+ maxPerms hostarch.AccessType `state:"manual"`
// private is true if this is a MAP_PRIVATE mapping, such that writes to
// the mapping are propagated to a copy.
@@ -421,8 +421,8 @@ type pma struct {
off uint64
// translatePerms is the permissions returned by memmap.Mappable.Translate.
- // If private is true, translatePerms is usermem.AnyAccess.
- translatePerms usermem.AccessType
+ // If private is true, translatePerms is hostarch.AnyAccess.
+ translatePerms hostarch.AccessType
// effectivePerms is the permissions allowed for non-ignorePermissions
// accesses. maxPerms is the permissions allowed for ignorePermissions
@@ -432,8 +432,8 @@ type pma struct {
//
// These are stored in the pma so that the IO implementation can avoid
// iterating mm.vmas when pmas already exist.
- effectivePerms usermem.AccessType
- maxPerms usermem.AccessType
+ effectivePerms hostarch.AccessType
+ maxPerms hostarch.AccessType
// needCOW is true if writes to the mapping must be propagated to a copy.
needCOW bool
@@ -465,7 +465,7 @@ type privateRefs struct {
}
type invalidateArgs struct {
- ar usermem.AddrRange
+ ar hostarch.AddrRange
opts memmap.InvalidateOpts
}
diff --git a/pkg/sentry/mm/mm_test.go b/pkg/sentry/mm/mm_test.go
index bc53bd41e..1304b0a2f 100644
--- a/pkg/sentry/mm/mm_test.go
+++ b/pkg/sentry/mm/mm_test.go
@@ -18,6 +18,7 @@ import (
"testing"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/contexttest"
"gvisor.dev/gvisor/pkg/sentry/limits"
@@ -51,7 +52,7 @@ func TestUsageASUpdates(t *testing.T) {
defer mm.DecUsers(ctx)
addr, err := mm.MMap(ctx, memmap.MMapOpts{
- Length: 2 * usermem.PageSize,
+ Length: 2 * hostarch.PageSize,
Private: true,
})
if err != nil {
@@ -62,7 +63,7 @@ func TestUsageASUpdates(t *testing.T) {
t.Fatalf("usageAS believes %v bytes are mapped; %v bytes are actually mapped", mm.usageAS, realUsage)
}
- mm.MUnmap(ctx, addr, usermem.PageSize)
+ mm.MUnmap(ctx, addr, hostarch.PageSize)
realUsage = mm.realUsageAS()
if mm.usageAS != realUsage {
t.Fatalf("usageAS believes %v bytes are mapped; %v bytes are actually mapped", mm.usageAS, realUsage)
@@ -86,10 +87,10 @@ func TestDataASUpdates(t *testing.T) {
defer mm.DecUsers(ctx)
addr, err := mm.MMap(ctx, memmap.MMapOpts{
- Length: 3 * usermem.PageSize,
+ Length: 3 * hostarch.PageSize,
Private: true,
- Perms: usermem.Write,
- MaxPerms: usermem.AnyAccess,
+ Perms: hostarch.Write,
+ MaxPerms: hostarch.AnyAccess,
})
if err != nil {
t.Fatalf("MMap got err %v want nil", err)
@@ -102,19 +103,19 @@ func TestDataASUpdates(t *testing.T) {
t.Fatalf("dataAS believes %v bytes are mapped; %v bytes are actually mapped", mm.dataAS, realDataAS)
}
- mm.MUnmap(ctx, addr, usermem.PageSize)
+ mm.MUnmap(ctx, addr, hostarch.PageSize)
realDataAS = mm.realDataAS()
if mm.dataAS != realDataAS {
t.Fatalf("dataAS believes %v bytes are mapped; %v bytes are actually mapped", mm.dataAS, realDataAS)
}
- mm.MProtect(addr+usermem.PageSize, usermem.PageSize, usermem.Read, false)
+ mm.MProtect(addr+hostarch.PageSize, hostarch.PageSize, hostarch.Read, false)
realDataAS = mm.realDataAS()
if mm.dataAS != realDataAS {
t.Fatalf("dataAS believes %v bytes are mapped; %v bytes are actually mapped", mm.dataAS, realDataAS)
}
- mm.MRemap(ctx, addr+2*usermem.PageSize, usermem.PageSize, 2*usermem.PageSize, MRemapOpts{
+ mm.MRemap(ctx, addr+2*hostarch.PageSize, hostarch.PageSize, 2*hostarch.PageSize, MRemapOpts{
Move: MRemapMayMove,
})
realDataAS = mm.realDataAS()
@@ -133,7 +134,7 @@ func TestBrkDataLimitUpdates(t *testing.T) {
// Try to extend the brk by one page and expect doing so to fail.
oldBrk, _ := mm.Brk(ctx, 0)
- if newBrk, _ := mm.Brk(ctx, oldBrk+usermem.PageSize); newBrk != oldBrk {
+ if newBrk, _ := mm.Brk(ctx, oldBrk+hostarch.PageSize); newBrk != oldBrk {
t.Errorf("brk() increased data segment above RLIMIT_DATA (old brk = %#x, new brk = %#x", oldBrk, newBrk)
}
}
@@ -145,10 +146,10 @@ func TestIOAfterUnmap(t *testing.T) {
defer mm.DecUsers(ctx)
addr, err := mm.MMap(ctx, memmap.MMapOpts{
- Length: usermem.PageSize,
+ Length: hostarch.PageSize,
Private: true,
- Perms: usermem.Read,
- MaxPerms: usermem.AnyAccess,
+ Perms: hostarch.Read,
+ MaxPerms: hostarch.AnyAccess,
})
if err != nil {
t.Fatalf("MMap got err %v want nil", err)
@@ -164,7 +165,7 @@ func TestIOAfterUnmap(t *testing.T) {
t.Errorf("CopyIn got %d want 1", n)
}
- err = mm.MUnmap(ctx, addr, usermem.PageSize)
+ err = mm.MUnmap(ctx, addr, hostarch.PageSize)
if err != nil {
t.Fatalf("MUnmap got err %v want nil", err)
}
@@ -185,10 +186,10 @@ func TestIOAfterMProtect(t *testing.T) {
defer mm.DecUsers(ctx)
addr, err := mm.MMap(ctx, memmap.MMapOpts{
- Length: usermem.PageSize,
+ Length: hostarch.PageSize,
Private: true,
- Perms: usermem.ReadWrite,
- MaxPerms: usermem.AnyAccess,
+ Perms: hostarch.ReadWrite,
+ MaxPerms: hostarch.AnyAccess,
})
if err != nil {
t.Fatalf("MMap got err %v want nil", err)
@@ -204,7 +205,7 @@ func TestIOAfterMProtect(t *testing.T) {
t.Errorf("CopyOut got %d want 1", n)
}
- err = mm.MProtect(addr, usermem.PageSize, usermem.Read, false)
+ err = mm.MProtect(addr, hostarch.PageSize, hostarch.Read, false)
if err != nil {
t.Errorf("MProtect got err %v want nil", err)
}
diff --git a/pkg/sentry/mm/pma.go b/pkg/sentry/mm/pma.go
index 7e5f7de64..5583f62b2 100644
--- a/pkg/sentry/mm/pma.go
+++ b/pkg/sentry/mm/pma.go
@@ -18,12 +18,12 @@ import (
"fmt"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/safecopy"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/usage"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
// existingPMAsLocked checks that pmas exist for all addresses in ar, and
@@ -34,7 +34,7 @@ import (
// Preconditions:
// * mm.activeMu must be locked.
// * ar.Length() != 0.
-func (mm *MemoryManager) existingPMAsLocked(ar usermem.AddrRange, at usermem.AccessType, ignorePermissions bool, needInternalMappings bool) pmaIterator {
+func (mm *MemoryManager) existingPMAsLocked(ar hostarch.AddrRange, at hostarch.AccessType, ignorePermissions bool, needInternalMappings bool) pmaIterator {
if checkInvariants {
if !ar.WellFormed() || ar.Length() == 0 {
panic(fmt.Sprintf("invalid ar: %v", ar))
@@ -70,7 +70,7 @@ func (mm *MemoryManager) existingPMAsLocked(ar usermem.AddrRange, at usermem.Acc
// and support access of type (at, ignorePermissions).
//
// Preconditions: mm.activeMu must be locked.
-func (mm *MemoryManager) existingVecPMAsLocked(ars usermem.AddrRangeSeq, at usermem.AccessType, ignorePermissions bool, needInternalMappings bool) bool {
+func (mm *MemoryManager) existingVecPMAsLocked(ars hostarch.AddrRangeSeq, at hostarch.AccessType, ignorePermissions bool, needInternalMappings bool) bool {
for ; !ars.IsEmpty(); ars = ars.Tail() {
if ar := ars.Head(); ar.Length() != 0 && !mm.existingPMAsLocked(ar, at, ignorePermissions, needInternalMappings).Ok() {
return false
@@ -98,7 +98,7 @@ func (mm *MemoryManager) existingVecPMAsLocked(ars usermem.AddrRangeSeq, at user
// * vseg.Range().Contains(ar.Start).
// * vmas must exist for all addresses in ar, and support accesses of type at
// (i.e. permission checks must have been performed against vmas).
-func (mm *MemoryManager) getPMAsLocked(ctx context.Context, vseg vmaIterator, ar usermem.AddrRange, at usermem.AccessType) (pmaIterator, pmaGapIterator, error) {
+func (mm *MemoryManager) getPMAsLocked(ctx context.Context, vseg vmaIterator, ar hostarch.AddrRange, at hostarch.AccessType) (pmaIterator, pmaGapIterator, error) {
if checkInvariants {
if !ar.WellFormed() || ar.Length() == 0 {
panic(fmt.Sprintf("invalid ar: %v", ar))
@@ -118,7 +118,7 @@ func (mm *MemoryManager) getPMAsLocked(ctx context.Context, vseg vmaIterator, ar
end = ar.End.RoundDown()
alignerr = syserror.EFAULT
}
- ar = usermem.AddrRange{ar.Start.RoundDown(), end}
+ ar = hostarch.AddrRange{ar.Start.RoundDown(), end}
pstart, pend, perr := mm.getPMAsInternalLocked(ctx, vseg, ar, at)
if pend.Start() <= ar.Start {
@@ -145,7 +145,7 @@ func (mm *MemoryManager) getPMAsLocked(ctx context.Context, vseg vmaIterator, ar
// * mm.activeMu must be locked for writing.
// * vmas must exist for all addresses in ars, and support accesses of type at
// (i.e. permission checks must have been performed against vmas).
-func (mm *MemoryManager) getVecPMAsLocked(ctx context.Context, ars usermem.AddrRangeSeq, at usermem.AccessType) (usermem.AddrRangeSeq, error) {
+func (mm *MemoryManager) getVecPMAsLocked(ctx context.Context, ars hostarch.AddrRangeSeq, at hostarch.AccessType) (hostarch.AddrRangeSeq, error) {
for arsit := ars; !arsit.IsEmpty(); arsit = arsit.Tail() {
ar := arsit.Head()
if ar.Length() == 0 {
@@ -164,7 +164,7 @@ func (mm *MemoryManager) getVecPMAsLocked(ctx context.Context, ars usermem.AddrR
end = ar.End.RoundDown()
alignerr = syserror.EFAULT
}
- ar = usermem.AddrRange{ar.Start.RoundDown(), end}
+ ar = hostarch.AddrRange{ar.Start.RoundDown(), end}
_, pend, perr := mm.getPMAsInternalLocked(ctx, mm.vmas.FindSegment(ar.Start), ar, at)
if perr != nil {
@@ -191,7 +191,7 @@ func (mm *MemoryManager) getVecPMAsLocked(ctx context.Context, ars usermem.AddrR
//
// getPMAsInternalLocked is an implementation helper for getPMAsLocked and
// getVecPMAsLocked; other clients should call one of those instead.
-func (mm *MemoryManager) getPMAsInternalLocked(ctx context.Context, vseg vmaIterator, ar usermem.AddrRange, at usermem.AccessType) (pmaIterator, pmaGapIterator, error) {
+func (mm *MemoryManager) getPMAsInternalLocked(ctx context.Context, vseg vmaIterator, ar hostarch.AddrRange, at hostarch.AccessType) (pmaIterator, pmaGapIterator, error) {
if checkInvariants {
if !ar.WellFormed() || ar.Length() == 0 || !ar.IsPageAligned() {
panic(fmt.Sprintf("invalid ar: %v", ar))
@@ -245,7 +245,7 @@ func (mm *MemoryManager) getPMAsInternalLocked(ctx context.Context, vseg vmaIter
pseg, pgap = mm.pmas.Insert(pgap, allocAR, pma{
file: mf,
off: fr.Start,
- translatePerms: usermem.AnyAccess,
+ translatePerms: hostarch.AnyAccess,
effectivePerms: vma.effectivePerms,
maxPerms: vma.maxPerms,
// Since we just allocated this memory and have the
@@ -335,7 +335,7 @@ func (mm *MemoryManager) getPMAsInternalLocked(ctx context.Context, vseg vmaIter
// Neither of these cases has enough spatial locality to
// benefit from copying nearby pages, so if the vma is
// executable, only copy the pages required.
- var copyAR usermem.AddrRange
+ var copyAR hostarch.AddrRange
if vseg.ValuePtr().effectivePerms.Execute {
copyAR = pseg.Range().Intersect(ar)
} else {
@@ -366,7 +366,7 @@ func (mm *MemoryManager) getPMAsInternalLocked(ctx context.Context, vseg vmaIter
// Replace the pma with a copy in the part of the address
// range where copying was successful. This doesn't change
// RSS.
- copyAR.End = copyAR.Start + usermem.Addr(fr.Length())
+ copyAR.End = copyAR.Start + hostarch.Addr(fr.Length())
if copyAR != pseg.Range() {
pseg = mm.pmas.Isolate(pseg, copyAR)
pstart = pmaIterator{} // iterators invalidated
@@ -380,7 +380,7 @@ func (mm *MemoryManager) getPMAsInternalLocked(ctx context.Context, vseg vmaIter
mf.IncRef(fr)
oldpma.file = mf
oldpma.off = fr.Start
- oldpma.translatePerms = usermem.AnyAccess
+ oldpma.translatePerms = hostarch.AnyAccess
oldpma.effectivePerms = vma.effectivePerms
oldpma.maxPerms = vma.maxPerms
oldpma.needCOW = false
@@ -499,14 +499,14 @@ const (
// privateAllocUnit may reduce page faults by allowing fewer, larger pmas
// to be mapped, but may result in larger amounts of wasted memory in the
// presence of fragmentation. privateAllocUnit must be a power-of-2
- // multiple of usermem.PageSize.
- privateAllocUnit = usermem.HugePageSize
+ // multiple of hostarch.PageSize.
+ privateAllocUnit = hostarch.HugePageSize
privateAllocMask = privateAllocUnit - 1
)
-func privateAligned(ar usermem.AddrRange) usermem.AddrRange {
- aligned := usermem.AddrRange{ar.Start &^ privateAllocMask, ar.End}
+func privateAligned(ar hostarch.AddrRange) hostarch.AddrRange {
+ aligned := hostarch.AddrRange{ar.Start &^ privateAllocMask, ar.End}
if end := (ar.End + privateAllocMask) &^ privateAllocMask; end >= ar.End {
aligned.End = end
}
@@ -548,7 +548,7 @@ func (mm *MemoryManager) isPMACopyOnWriteLocked(vseg vmaIterator, pseg pmaIterat
rseg := mm.privateRefs.refs.FindSegment(fr.Start)
if rseg.Ok() && rseg.Value() == 1 && fr.End <= rseg.End() {
pma.needCOW = false
- // pma.private => pma.translatePerms == usermem.AnyAccess
+ // pma.private => pma.translatePerms == hostarch.AnyAccess
vma := vseg.ValuePtr()
pma.effectivePerms = vma.effectivePerms
pma.maxPerms = vma.maxPerms
@@ -558,7 +558,7 @@ func (mm *MemoryManager) isPMACopyOnWriteLocked(vseg vmaIterator, pseg pmaIterat
}
// Invalidate implements memmap.MappingSpace.Invalidate.
-func (mm *MemoryManager) Invalidate(ar usermem.AddrRange, opts memmap.InvalidateOpts) {
+func (mm *MemoryManager) Invalidate(ar hostarch.AddrRange, opts memmap.InvalidateOpts) {
if checkInvariants {
if !ar.WellFormed() || ar.Length() == 0 || !ar.IsPageAligned() {
panic(fmt.Sprintf("invalid ar: %v", ar))
@@ -581,7 +581,7 @@ func (mm *MemoryManager) Invalidate(ar usermem.AddrRange, opts memmap.Invalidate
// * mm.activeMu must be locked for writing.
// * ar.Length() != 0.
// * ar must be page-aligned.
-func (mm *MemoryManager) invalidateLocked(ar usermem.AddrRange, invalidatePrivate, invalidateShared bool) {
+func (mm *MemoryManager) invalidateLocked(ar hostarch.AddrRange, invalidatePrivate, invalidateShared bool) {
if checkInvariants {
if !ar.WellFormed() || ar.Length() == 0 || !ar.IsPageAligned() {
panic(fmt.Sprintf("invalid ar: %v", ar))
@@ -627,7 +627,7 @@ func (mm *MemoryManager) invalidateLocked(ar usermem.AddrRange, invalidatePrivat
// Preconditions:
// * ar.Length() != 0.
// * ar must be page-aligned.
-func (mm *MemoryManager) Pin(ctx context.Context, ar usermem.AddrRange, at usermem.AccessType, ignorePermissions bool) ([]PinnedRange, error) {
+func (mm *MemoryManager) Pin(ctx context.Context, ar hostarch.AddrRange, at hostarch.AccessType, ignorePermissions bool) ([]PinnedRange, error) {
if checkInvariants {
if !ar.WellFormed() || ar.Length() == 0 || !ar.IsPageAligned() {
panic(fmt.Sprintf("invalid ar: %v", ar))
@@ -683,7 +683,7 @@ func (mm *MemoryManager) Pin(ctx context.Context, ar usermem.AddrRange, at userm
// PinnedRanges are returned by MemoryManager.Pin.
type PinnedRange struct {
// Source is the corresponding range of addresses.
- Source usermem.AddrRange
+ Source hostarch.AddrRange
// File is the mapped file.
File memmap.File
@@ -713,7 +713,7 @@ func Unpin(prs []PinnedRange) {
// * !oldAR.Overlaps(newAR).
// * mm.pmas.IsEmptyRange(newAR).
// * oldAR and newAR must be page-aligned.
-func (mm *MemoryManager) movePMAsLocked(oldAR, newAR usermem.AddrRange) {
+func (mm *MemoryManager) movePMAsLocked(oldAR, newAR hostarch.AddrRange) {
if checkInvariants {
if !oldAR.WellFormed() || oldAR.Length() == 0 || !oldAR.IsPageAligned() {
panic(fmt.Sprintf("invalid oldAR: %v", oldAR))
@@ -731,7 +731,7 @@ func (mm *MemoryManager) movePMAsLocked(oldAR, newAR usermem.AddrRange) {
}
type movedPMA struct {
- oldAR usermem.AddrRange
+ oldAR hostarch.AddrRange
pma pma
}
var movedPMAs []movedPMA
@@ -751,7 +751,7 @@ func (mm *MemoryManager) movePMAsLocked(oldAR, newAR usermem.AddrRange) {
pgap := mm.pmas.FindGap(newAR.Start)
for i := range movedPMAs {
mpma := &movedPMAs[i]
- pmaNewAR := usermem.AddrRange{mpma.oldAR.Start + off, mpma.oldAR.End + off}
+ pmaNewAR := hostarch.AddrRange{mpma.oldAR.Start + off, mpma.oldAR.End + off}
pgap = mm.pmas.Insert(pgap, pmaNewAR, mpma.pma).NextGap()
}
@@ -776,7 +776,7 @@ func (mm *MemoryManager) movePMAsLocked(oldAR, newAR usermem.AddrRange) {
//
// Postconditions: getPMAInternalMappingsLocked does not invalidate iterators
// into mm.pmas.
-func (mm *MemoryManager) getPMAInternalMappingsLocked(pseg pmaIterator, ar usermem.AddrRange) (pmaGapIterator, error) {
+func (mm *MemoryManager) getPMAInternalMappingsLocked(pseg pmaIterator, ar hostarch.AddrRange) (pmaGapIterator, error) {
if checkInvariants {
if !ar.WellFormed() || ar.Length() == 0 {
panic(fmt.Sprintf("invalid ar: %v", ar))
@@ -808,7 +808,7 @@ func (mm *MemoryManager) getPMAInternalMappingsLocked(pseg pmaIterator, ar userm
//
// Postconditions: getVecPMAInternalMappingsLocked does not invalidate iterators
// into mm.pmas.
-func (mm *MemoryManager) getVecPMAInternalMappingsLocked(ars usermem.AddrRangeSeq) (usermem.AddrRangeSeq, error) {
+func (mm *MemoryManager) getVecPMAInternalMappingsLocked(ars hostarch.AddrRangeSeq) (hostarch.AddrRangeSeq, error) {
for arsit := ars; !arsit.IsEmpty(); arsit = arsit.Tail() {
ar := arsit.Head()
if ar.Length() == 0 {
@@ -829,7 +829,7 @@ func (mm *MemoryManager) getVecPMAInternalMappingsLocked(ars usermem.AddrRangeSe
// in ar.
// * ar.Length() != 0.
// * pseg.Range().Contains(ar.Start).
-func (mm *MemoryManager) internalMappingsLocked(pseg pmaIterator, ar usermem.AddrRange) safemem.BlockSeq {
+func (mm *MemoryManager) internalMappingsLocked(pseg pmaIterator, ar hostarch.AddrRange) safemem.BlockSeq {
if checkInvariants {
if !ar.WellFormed() || ar.Length() == 0 {
panic(fmt.Sprintf("invalid ar: %v", ar))
@@ -866,7 +866,7 @@ func (mm *MemoryManager) internalMappingsLocked(pseg pmaIterator, ar usermem.Add
// * mm.activeMu must be locked.
// * Internal mappings must have been previously established for all addresses
// in ars.
-func (mm *MemoryManager) vecInternalMappingsLocked(ars usermem.AddrRangeSeq) safemem.BlockSeq {
+func (mm *MemoryManager) vecInternalMappingsLocked(ars hostarch.AddrRangeSeq) safemem.BlockSeq {
var ims []safemem.Block
for ; !ars.IsEmpty(); ars = ars.Tail() {
ar := ars.Head()
@@ -931,7 +931,7 @@ func (mm *MemoryManager) decPrivateRef(fr memmap.FileRange) {
// MemoryManager to reflect the insertion of a pma at ar.
//
// Preconditions: mm.activeMu must be locked for writing.
-func (mm *MemoryManager) addRSSLocked(ar usermem.AddrRange) {
+func (mm *MemoryManager) addRSSLocked(ar hostarch.AddrRange) {
mm.curRSS += uint64(ar.Length())
if mm.curRSS > mm.maxRSS {
mm.maxRSS = mm.curRSS
@@ -942,19 +942,19 @@ func (mm *MemoryManager) addRSSLocked(ar usermem.AddrRange) {
// reflect the removal of a pma at ar.
//
// Preconditions: mm.activeMu must be locked for writing.
-func (mm *MemoryManager) removeRSSLocked(ar usermem.AddrRange) {
+func (mm *MemoryManager) removeRSSLocked(ar hostarch.AddrRange) {
mm.curRSS -= uint64(ar.Length())
}
// pmaSetFunctions implements segment.Functions for pmaSet.
type pmaSetFunctions struct{}
-func (pmaSetFunctions) MinKey() usermem.Addr {
+func (pmaSetFunctions) MinKey() hostarch.Addr {
return 0
}
-func (pmaSetFunctions) MaxKey() usermem.Addr {
- return ^usermem.Addr(0)
+func (pmaSetFunctions) MaxKey() hostarch.Addr {
+ return ^hostarch.Addr(0)
}
func (pmaSetFunctions) ClearValue(pma *pma) {
@@ -962,7 +962,7 @@ func (pmaSetFunctions) ClearValue(pma *pma) {
pma.internalMappings = safemem.BlockSeq{}
}
-func (pmaSetFunctions) Merge(ar1 usermem.AddrRange, pma1 pma, ar2 usermem.AddrRange, pma2 pma) (pma, bool) {
+func (pmaSetFunctions) Merge(ar1 hostarch.AddrRange, pma1 pma, ar2 hostarch.AddrRange, pma2 pma) (pma, bool) {
if pma1.file != pma2.file ||
pma1.off+uint64(ar1.Length()) != pma2.off ||
pma1.translatePerms != pma2.translatePerms ||
@@ -980,7 +980,7 @@ func (pmaSetFunctions) Merge(ar1 usermem.AddrRange, pma1 pma, ar2 usermem.AddrRa
return pma1, true
}
-func (pmaSetFunctions) Split(ar usermem.AddrRange, p pma, split usermem.Addr) (pma, pma) {
+func (pmaSetFunctions) Split(ar hostarch.AddrRange, p pma, split hostarch.Addr) (pma, pma) {
newlen1 := uint64(split - ar.Start)
p2 := p
p2.off += newlen1
@@ -997,7 +997,7 @@ func (pmaSetFunctions) Split(ar usermem.AddrRange, p pma, split usermem.Addr) (p
// Preconditions:
// * mm.activeMu must be locked.
// * addr <= pgap.Start().
-func (mm *MemoryManager) findOrSeekPrevUpperBoundPMA(addr usermem.Addr, pgap pmaGapIterator) pmaIterator {
+func (mm *MemoryManager) findOrSeekPrevUpperBoundPMA(addr hostarch.Addr, pgap pmaGapIterator) pmaIterator {
if checkInvariants {
if !pgap.Ok() {
panic("terminal pma iterator")
@@ -1045,7 +1045,7 @@ func (pseg pmaIterator) fileRange() memmap.FileRange {
// Preconditions:
// * pseg.Range().IsSupersetOf(ar).
// * ar.Length != 0.
-func (pseg pmaIterator) fileRangeOf(ar usermem.AddrRange) memmap.FileRange {
+func (pseg pmaIterator) fileRangeOf(ar hostarch.AddrRange) memmap.FileRange {
if checkInvariants {
if !pseg.Ok() {
panic("terminal pma iterator")
diff --git a/pkg/sentry/mm/procfs.go b/pkg/sentry/mm/procfs.go
index 73bfbea49..f1440e884 100644
--- a/pkg/sentry/mm/procfs.go
+++ b/pkg/sentry/mm/procfs.go
@@ -19,9 +19,9 @@ import (
"fmt"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile"
"gvisor.dev/gvisor/pkg/sentry/memmap"
- "gvisor.dev/gvisor/pkg/usermem"
)
const (
@@ -29,7 +29,7 @@ const (
// include/linux/kdev_t.h:MINORBITS
devMinorBits = 20
- vsyscallEnd = usermem.Addr(0xffffffffff601000)
+ vsyscallEnd = hostarch.Addr(0xffffffffff601000)
vsyscallMapsEntry = "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n"
vsyscallSmapsEntry = vsyscallMapsEntry +
"Size: 4 kB\n" +
@@ -62,7 +62,7 @@ func (mm *MemoryManager) NeedsUpdate(generation int64) bool {
func (mm *MemoryManager) ReadMapsDataInto(ctx context.Context, buf *bytes.Buffer) {
mm.mappingMu.RLock()
defer mm.mappingMu.RUnlock()
- var start usermem.Addr
+ var start hostarch.Addr
for vseg := mm.vmas.LowerBoundSegment(start); vseg.Ok(); vseg = vseg.NextSegment() {
mm.appendVMAMapsEntryLocked(ctx, vseg, buf)
@@ -88,9 +88,9 @@ func (mm *MemoryManager) ReadMapsSeqFileData(ctx context.Context, handle seqfile
mm.mappingMu.RLock()
defer mm.mappingMu.RUnlock()
var data []seqfile.SeqData
- var start usermem.Addr
+ var start hostarch.Addr
if handle != nil {
- start = *handle.(*usermem.Addr)
+ start = *handle.(*hostarch.Addr)
}
for vseg := mm.vmas.LowerBoundSegment(start); vseg.Ok(); vseg = vseg.NextSegment() {
vmaAddr := vseg.End()
@@ -177,7 +177,7 @@ func (mm *MemoryManager) appendVMAMapsEntryLocked(ctx context.Context, vseg vmaI
func (mm *MemoryManager) ReadSmapsDataInto(ctx context.Context, buf *bytes.Buffer) {
mm.mappingMu.RLock()
defer mm.mappingMu.RUnlock()
- var start usermem.Addr
+ var start hostarch.Addr
for vseg := mm.vmas.LowerBoundSegment(start); vseg.Ok(); vseg = vseg.NextSegment() {
mm.vmaSmapsEntryIntoLocked(ctx, vseg, buf)
@@ -196,9 +196,9 @@ func (mm *MemoryManager) ReadSmapsSeqFileData(ctx context.Context, handle seqfil
mm.mappingMu.RLock()
defer mm.mappingMu.RUnlock()
var data []seqfile.SeqData
- var start usermem.Addr
+ var start hostarch.Addr
if handle != nil {
- start = *handle.(*usermem.Addr)
+ start = *handle.(*hostarch.Addr)
}
for vseg := mm.vmas.LowerBoundSegment(start); vseg.Ok(); vseg = vseg.NextSegment() {
vmaAddr := vseg.End()
@@ -279,8 +279,8 @@ func (mm *MemoryManager) vmaSmapsEntryIntoLocked(ctx context.Context, vseg vmaIt
// Swap is not implemented.
fmt.Fprintf(b, "Swap: %8d kB\n", 0)
fmt.Fprintf(b, "SwapPss: %8d kB\n", 0)
- fmt.Fprintf(b, "KernelPageSize: %8d kB\n", usermem.PageSize/1024)
- fmt.Fprintf(b, "MMUPageSize: %8d kB\n", usermem.PageSize/1024)
+ fmt.Fprintf(b, "KernelPageSize: %8d kB\n", hostarch.PageSize/1024)
+ fmt.Fprintf(b, "MMUPageSize: %8d kB\n", hostarch.PageSize/1024)
locked := rss
if vma.mlockMode == memmap.MLockNone {
locked = 0
diff --git a/pkg/sentry/mm/shm.go b/pkg/sentry/mm/shm.go
index 6432731d4..3130be80c 100644
--- a/pkg/sentry/mm/shm.go
+++ b/pkg/sentry/mm/shm.go
@@ -16,13 +16,13 @@ package mm
import (
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/kernel/shm"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
// DetachShm unmaps a sysv shared memory segment.
-func (mm *MemoryManager) DetachShm(ctx context.Context, addr usermem.Addr) error {
+func (mm *MemoryManager) DetachShm(ctx context.Context, addr hostarch.Addr) error {
if addr != addr.RoundDown() {
// "... shmaddr is not aligned on a page boundary." - man shmdt(2)
return syserror.EINVAL
@@ -52,7 +52,7 @@ func (mm *MemoryManager) DetachShm(ctx context.Context, addr usermem.Addr) error
}
// Remove all vmas that could have been created by the same attach.
- end := addr + usermem.Addr(detached.EffectiveSize())
+ end := addr + hostarch.Addr(detached.EffectiveSize())
for vseg.Ok() && vseg.End() <= end {
vma := vseg.ValuePtr()
if vma.mappable == detached && uint64(vseg.Start()-addr) == vma.off {
diff --git a/pkg/sentry/mm/special_mappable.go b/pkg/sentry/mm/special_mappable.go
index 48d8b6a2b..e748b7ff8 100644
--- a/pkg/sentry/mm/special_mappable.go
+++ b/pkg/sentry/mm/special_mappable.go
@@ -16,11 +16,11 @@ package mm
import (
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/pgalloc"
"gvisor.dev/gvisor/pkg/sentry/usage"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
// SpecialMappable implements memmap.MappingIdentity and memmap.Mappable with
@@ -77,21 +77,21 @@ func (m *SpecialMappable) Msync(ctx context.Context, mr memmap.MappableRange) er
}
// AddMapping implements memmap.Mappable.AddMapping.
-func (*SpecialMappable) AddMapping(context.Context, memmap.MappingSpace, usermem.AddrRange, uint64, bool) error {
+func (*SpecialMappable) AddMapping(context.Context, memmap.MappingSpace, hostarch.AddrRange, uint64, bool) error {
return nil
}
// RemoveMapping implements memmap.Mappable.RemoveMapping.
-func (*SpecialMappable) RemoveMapping(context.Context, memmap.MappingSpace, usermem.AddrRange, uint64, bool) {
+func (*SpecialMappable) RemoveMapping(context.Context, memmap.MappingSpace, hostarch.AddrRange, uint64, bool) {
}
// CopyMapping implements memmap.Mappable.CopyMapping.
-func (*SpecialMappable) CopyMapping(context.Context, memmap.MappingSpace, usermem.AddrRange, usermem.AddrRange, uint64, bool) error {
+func (*SpecialMappable) CopyMapping(context.Context, memmap.MappingSpace, hostarch.AddrRange, hostarch.AddrRange, uint64, bool) error {
return nil
}
// Translate implements memmap.Mappable.Translate.
-func (m *SpecialMappable) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) {
+func (m *SpecialMappable) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) {
var err error
if required.End > m.fr.Length() {
err = &memmap.BusError{syserror.EFAULT}
@@ -102,7 +102,7 @@ func (m *SpecialMappable) Translate(ctx context.Context, required, optional memm
Source: source,
File: m.mfp.MemoryFile(),
Offset: m.fr.Start + source.Start,
- Perms: usermem.AnyAccess,
+ Perms: hostarch.AnyAccess,
},
}, err
}
@@ -146,7 +146,7 @@ func NewSharedAnonMappable(length uint64, mfp pgalloc.MemoryFileProvider) (*Spec
if length == 0 {
return nil, syserror.EINVAL
}
- alignedLen, ok := usermem.Addr(length).RoundUp()
+ alignedLen, ok := hostarch.Addr(length).RoundUp()
if !ok {
return nil, syserror.EINVAL
}
diff --git a/pkg/sentry/mm/syscalls.go b/pkg/sentry/mm/syscalls.go
index 69e37330b..7ad6b7c21 100644
--- a/pkg/sentry/mm/syscalls.go
+++ b/pkg/sentry/mm/syscalls.go
@@ -21,20 +21,20 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/kernel/futex"
"gvisor.dev/gvisor/pkg/sentry/limits"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
// HandleUserFault handles an application page fault. sp is the faulting
// application thread's stack pointer.
//
// Preconditions: mm.as != nil.
-func (mm *MemoryManager) HandleUserFault(ctx context.Context, addr usermem.Addr, at usermem.AccessType, sp usermem.Addr) error {
- ar, ok := addr.RoundDown().ToRange(usermem.PageSize)
+func (mm *MemoryManager) HandleUserFault(ctx context.Context, addr hostarch.Addr, at hostarch.AccessType, sp hostarch.Addr) error {
+ ar, ok := addr.RoundDown().ToRange(hostarch.PageSize)
if !ok {
return syserror.EFAULT
}
@@ -72,11 +72,11 @@ func (mm *MemoryManager) HandleUserFault(ctx context.Context, addr usermem.Addr,
}
// MMap establishes a memory mapping.
-func (mm *MemoryManager) MMap(ctx context.Context, opts memmap.MMapOpts) (usermem.Addr, error) {
+func (mm *MemoryManager) MMap(ctx context.Context, opts memmap.MMapOpts) (hostarch.Addr, error) {
if opts.Length == 0 {
return 0, syserror.EINVAL
}
- length, ok := usermem.Addr(opts.Length).RoundUp()
+ length, ok := hostarch.Addr(opts.Length).RoundUp()
if !ok {
return 0, syserror.ENOMEM
}
@@ -84,7 +84,7 @@ func (mm *MemoryManager) MMap(ctx context.Context, opts memmap.MMapOpts) (userme
if opts.Mappable != nil {
// Offset must be aligned.
- if usermem.Addr(opts.Offset).RoundDown() != usermem.Addr(opts.Offset) {
+ if hostarch.Addr(opts.Offset).RoundDown() != hostarch.Addr(opts.Offset) {
return 0, syserror.EINVAL
}
// Offset + length must not overflow.
@@ -157,7 +157,7 @@ func (mm *MemoryManager) MMap(ctx context.Context, opts memmap.MMapOpts) (userme
// Preconditions:
// * mm.mappingMu must be locked.
// * vseg.Range().IsSupersetOf(ar).
-func (mm *MemoryManager) populateVMA(ctx context.Context, vseg vmaIterator, ar usermem.AddrRange, precommit bool) {
+func (mm *MemoryManager) populateVMA(ctx context.Context, vseg vmaIterator, ar hostarch.AddrRange, precommit bool) {
if !vseg.ValuePtr().effectivePerms.Any() {
// Linux doesn't populate inaccessible pages. See
// mm/gup.c:populate_vma_page_range.
@@ -175,7 +175,7 @@ func (mm *MemoryManager) populateVMA(ctx context.Context, vseg vmaIterator, ar u
}
// Ensure that we have usable pmas.
- pseg, _, err := mm.getPMAsLocked(ctx, vseg, ar, usermem.NoAccess)
+ pseg, _, err := mm.getPMAsLocked(ctx, vseg, ar, hostarch.NoAccess)
if err != nil {
// mm/util.c:vm_mmap_pgoff() ignores the error, if any, from
// mm/gup.c:mm_populate(). If it matters, we'll get it again when
@@ -203,7 +203,7 @@ func (mm *MemoryManager) populateVMA(ctx context.Context, vseg vmaIterator, ar u
// * vseg.Range().IsSupersetOf(ar).
//
// Postconditions: mm.mappingMu will be unlocked.
-func (mm *MemoryManager) populateVMAAndUnlock(ctx context.Context, vseg vmaIterator, ar usermem.AddrRange, precommit bool) {
+func (mm *MemoryManager) populateVMAAndUnlock(ctx context.Context, vseg vmaIterator, ar hostarch.AddrRange, precommit bool) {
// See populateVMA above for commentary.
if !vseg.ValuePtr().effectivePerms.Any() {
mm.mappingMu.Unlock()
@@ -221,7 +221,7 @@ func (mm *MemoryManager) populateVMAAndUnlock(ctx context.Context, vseg vmaItera
// mm.mappingMu doesn't need to be write-locked for getPMAsLocked, and it
// isn't needed at all for mapASLocked.
mm.mappingMu.DowngradeLock()
- pseg, _, err := mm.getPMAsLocked(ctx, vseg, ar, usermem.NoAccess)
+ pseg, _, err := mm.getPMAsLocked(ctx, vseg, ar, hostarch.NoAccess)
mm.mappingMu.RUnlock()
if err != nil {
mm.activeMu.Unlock()
@@ -234,7 +234,7 @@ func (mm *MemoryManager) populateVMAAndUnlock(ctx context.Context, vseg vmaItera
}
// MapStack allocates the initial process stack.
-func (mm *MemoryManager) MapStack(ctx context.Context) (usermem.AddrRange, error) {
+func (mm *MemoryManager) MapStack(ctx context.Context) (hostarch.AddrRange, error) {
// maxStackSize is the maximum supported process stack size in bytes.
//
// This limit exists because stack growing isn't implemented, so the entire
@@ -242,7 +242,7 @@ func (mm *MemoryManager) MapStack(ctx context.Context) (usermem.AddrRange, error
const maxStackSize = 128 << 20
stackSize := limits.FromContext(ctx).Get(limits.Stack)
- r, ok := usermem.Addr(stackSize.Cur).RoundUp()
+ r, ok := hostarch.Addr(stackSize.Cur).RoundUp()
sz := uint64(r)
if !ok {
// RLIM_INFINITY rounds up to 0.
@@ -251,16 +251,16 @@ func (mm *MemoryManager) MapStack(ctx context.Context) (usermem.AddrRange, error
ctx.Warningf("Capping stack size from RLIMIT_STACK of %v down to %v.", sz, maxStackSize)
sz = maxStackSize
} else if sz == 0 {
- return usermem.AddrRange{}, syserror.ENOMEM
+ return hostarch.AddrRange{}, syserror.ENOMEM
}
- szaddr := usermem.Addr(sz)
+ szaddr := hostarch.Addr(sz)
ctx.Debugf("Allocating stack with size of %v bytes", sz)
// Determine the stack's desired location. Unlike Linux, address
// randomization can't be disabled.
- stackEnd := mm.layout.MaxAddr - usermem.Addr(mrand.Int63n(int64(mm.layout.MaxStackRand))).RoundDown()
+ stackEnd := mm.layout.MaxAddr - hostarch.Addr(mrand.Int63n(int64(mm.layout.MaxStackRand))).RoundDown()
if stackEnd < szaddr {
- return usermem.AddrRange{}, syserror.ENOMEM
+ return hostarch.AddrRange{}, syserror.ENOMEM
}
stackStart := stackEnd - szaddr
mm.mappingMu.Lock()
@@ -268,8 +268,8 @@ func (mm *MemoryManager) MapStack(ctx context.Context) (usermem.AddrRange, error
_, ar, err := mm.createVMALocked(ctx, memmap.MMapOpts{
Length: sz,
Addr: stackStart,
- Perms: usermem.ReadWrite,
- MaxPerms: usermem.AnyAccess,
+ Perms: hostarch.ReadWrite,
+ MaxPerms: hostarch.AnyAccess,
Private: true,
GrowsDown: true,
MLockMode: mm.defMLockMode,
@@ -279,14 +279,14 @@ func (mm *MemoryManager) MapStack(ctx context.Context) (usermem.AddrRange, error
}
// MUnmap implements the semantics of Linux's munmap(2).
-func (mm *MemoryManager) MUnmap(ctx context.Context, addr usermem.Addr, length uint64) error {
+func (mm *MemoryManager) MUnmap(ctx context.Context, addr hostarch.Addr, length uint64) error {
if addr != addr.RoundDown() {
return syserror.EINVAL
}
if length == 0 {
return syserror.EINVAL
}
- la, ok := usermem.Addr(length).RoundUp()
+ la, ok := hostarch.Addr(length).RoundUp()
if !ok {
return syserror.EINVAL
}
@@ -308,7 +308,7 @@ type MRemapOpts struct {
// NewAddr is the new address for the remapping. NewAddr is ignored unless
// Move is MMRemapMustMove.
- NewAddr usermem.Addr
+ NewAddr hostarch.Addr
}
// MRemapMoveMode controls MRemap's moving behavior.
@@ -328,7 +328,7 @@ const (
)
// MRemap implements the semantics of Linux's mremap(2).
-func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr usermem.Addr, oldSize uint64, newSize uint64, opts MRemapOpts) (usermem.Addr, error) {
+func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr hostarch.Addr, oldSize uint64, newSize uint64, opts MRemapOpts) (hostarch.Addr, error) {
// "Note that old_address has to be page aligned." - mremap(2)
if oldAddr.RoundDown() != oldAddr {
return 0, syserror.EINVAL
@@ -336,9 +336,9 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr usermem.Addr, oldSi
// Linux treats an old_size that rounds up to 0 as 0, which is otherwise a
// valid size. However, new_size can't be 0 after rounding.
- oldSizeAddr, _ := usermem.Addr(oldSize).RoundUp()
+ oldSizeAddr, _ := hostarch.Addr(oldSize).RoundUp()
oldSize = uint64(oldSizeAddr)
- newSizeAddr, ok := usermem.Addr(newSize).RoundUp()
+ newSizeAddr, ok := hostarch.Addr(newSize).RoundUp()
if !ok || newSizeAddr == 0 {
return 0, syserror.EINVAL
}
@@ -392,8 +392,8 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr usermem.Addr, oldSi
if newSize < oldSize {
// If oldAddr+oldSize didn't overflow, oldAddr+newSize can't
// either.
- newEnd := oldAddr + usermem.Addr(newSize)
- mm.unmapLocked(ctx, usermem.AddrRange{newEnd, oldEnd})
+ newEnd := oldAddr + hostarch.Addr(newSize)
+ mm.unmapLocked(ctx, hostarch.AddrRange{newEnd, oldEnd})
}
return oldAddr, nil
}
@@ -438,7 +438,7 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr usermem.Addr, oldSi
}
// Find a location for the new mapping.
- var newAR usermem.AddrRange
+ var newAR hostarch.AddrRange
switch opts.Move {
case MRemapMayMove:
newAddr, err := mm.findAvailableLocked(newSize, findAvailableOpts{})
@@ -457,7 +457,7 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr usermem.Addr, oldSi
if !ok {
return 0, syserror.EINVAL
}
- if (usermem.AddrRange{oldAddr, oldEnd}).Overlaps(newAR) {
+ if (hostarch.AddrRange{oldAddr, oldEnd}).Overlaps(newAR) {
return 0, syserror.EINVAL
}
@@ -479,8 +479,8 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr usermem.Addr, oldSi
// correct: compare Linux's mm/mremap.c:mremap_to() => do_munmap(),
// vma_to_resize().
if newSize < oldSize {
- oldNewEnd := oldAddr + usermem.Addr(newSize)
- mm.unmapLocked(ctx, usermem.AddrRange{oldNewEnd, oldEnd})
+ oldNewEnd := oldAddr + hostarch.Addr(newSize)
+ mm.unmapLocked(ctx, hostarch.AddrRange{oldNewEnd, oldEnd})
oldEnd = oldNewEnd
}
@@ -488,7 +488,7 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr usermem.Addr, oldSi
vseg = mm.vmas.FindSegment(oldAddr)
}
- oldAR := usermem.AddrRange{oldAddr, oldEnd}
+ oldAR := hostarch.AddrRange{oldAddr, oldEnd}
// Check that oldEnd maps to the same vma as oldAddr.
if vseg.End() < oldEnd {
@@ -588,14 +588,14 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr usermem.Addr, oldSi
}
// MProtect implements the semantics of Linux's mprotect(2).
-func (mm *MemoryManager) MProtect(addr usermem.Addr, length uint64, realPerms usermem.AccessType, growsDown bool) error {
+func (mm *MemoryManager) MProtect(addr hostarch.Addr, length uint64, realPerms hostarch.AccessType, growsDown bool) error {
if addr.RoundDown() != addr {
return syserror.EINVAL
}
if length == 0 {
return nil
}
- rlength, ok := usermem.Addr(length).RoundUp()
+ rlength, ok := hostarch.Addr(length).RoundUp()
if !ok {
return syserror.ENOMEM
}
@@ -692,19 +692,19 @@ func (mm *MemoryManager) MProtect(addr usermem.Addr, length uint64, realPerms us
}
// BrkSetup sets mm's brk address to addr and its brk size to 0.
-func (mm *MemoryManager) BrkSetup(ctx context.Context, addr usermem.Addr) {
+func (mm *MemoryManager) BrkSetup(ctx context.Context, addr hostarch.Addr) {
mm.mappingMu.Lock()
defer mm.mappingMu.Unlock()
// Unmap the existing brk.
if mm.brk.Length() != 0 {
mm.unmapLocked(ctx, mm.brk)
}
- mm.brk = usermem.AddrRange{addr, addr}
+ mm.brk = hostarch.AddrRange{addr, addr}
}
// Brk implements the semantics of Linux's brk(2), except that it returns an
// error on failure.
-func (mm *MemoryManager) Brk(ctx context.Context, addr usermem.Addr) (usermem.Addr, error) {
+func (mm *MemoryManager) Brk(ctx context.Context, addr hostarch.Addr) (hostarch.Addr, error) {
mm.mappingMu.Lock()
// Can't defer mm.mappingMu.Unlock(); see below.
@@ -741,8 +741,8 @@ func (mm *MemoryManager) Brk(ctx context.Context, addr usermem.Addr) (usermem.Ad
Fixed: true,
// Compare Linux's
// arch/x86/include/asm/page_types.h:VM_DATA_DEFAULT_FLAGS.
- Perms: usermem.ReadWrite,
- MaxPerms: usermem.AnyAccess,
+ Perms: hostarch.ReadWrite,
+ MaxPerms: hostarch.AnyAccess,
Private: true,
// Linux: mm/mmap.c:sys_brk() => do_brk_flags() includes
// mm->def_flags.
@@ -762,7 +762,7 @@ func (mm *MemoryManager) Brk(ctx context.Context, addr usermem.Addr) (usermem.Ad
}
case newbrkpg < oldbrkpg:
- mm.unmapLocked(ctx, usermem.AddrRange{newbrkpg, oldbrkpg})
+ mm.unmapLocked(ctx, hostarch.AddrRange{newbrkpg, oldbrkpg})
fallthrough
default:
@@ -775,9 +775,9 @@ func (mm *MemoryManager) Brk(ctx context.Context, addr usermem.Addr) (usermem.Ad
// MLock implements the semantics of Linux's mlock()/mlock2()/munlock(),
// depending on mode.
-func (mm *MemoryManager) MLock(ctx context.Context, addr usermem.Addr, length uint64, mode memmap.MLockMode) error {
+func (mm *MemoryManager) MLock(ctx context.Context, addr hostarch.Addr, length uint64, mode memmap.MLockMode) error {
// Linux allows this to overflow.
- la, _ := usermem.Addr(length + addr.PageOffset()).RoundUp()
+ la, _ := hostarch.Addr(length + addr.PageOffset()).RoundUp()
ar, ok := addr.RoundDown().ToRange(uint64(la))
if !ok {
return syserror.EINVAL
@@ -850,7 +850,7 @@ func (mm *MemoryManager) MLock(ctx context.Context, addr usermem.Addr, length ui
mm.mappingMu.RUnlock()
return syserror.ENOMEM
}
- _, _, err := mm.getPMAsLocked(ctx, vseg, vseg.Range().Intersect(ar), usermem.NoAccess)
+ _, _, err := mm.getPMAsLocked(ctx, vseg, vseg.Range().Intersect(ar), hostarch.NoAccess)
if err != nil {
mm.activeMu.Unlock()
mm.mappingMu.RUnlock()
@@ -945,7 +945,7 @@ func (mm *MemoryManager) MLockAll(ctx context.Context, opts MLockAllOpts) error
mm.mappingMu.DowngradeLock()
for vseg := mm.vmas.FirstSegment(); vseg.Ok(); vseg = vseg.NextSegment() {
if vseg.ValuePtr().effectivePerms.Any() {
- mm.getPMAsLocked(ctx, vseg, vseg.Range(), usermem.NoAccess)
+ mm.getPMAsLocked(ctx, vseg, vseg.Range(), hostarch.NoAccess)
}
}
@@ -965,7 +965,7 @@ func (mm *MemoryManager) MLockAll(ctx context.Context, opts MLockAllOpts) error
}
// NumaPolicy implements the semantics of Linux's get_mempolicy(MPOL_F_ADDR).
-func (mm *MemoryManager) NumaPolicy(addr usermem.Addr) (linux.NumaPolicy, uint64, error) {
+func (mm *MemoryManager) NumaPolicy(addr hostarch.Addr) (linux.NumaPolicy, uint64, error) {
mm.mappingMu.RLock()
defer mm.mappingMu.RUnlock()
vseg := mm.vmas.FindSegment(addr)
@@ -977,12 +977,12 @@ func (mm *MemoryManager) NumaPolicy(addr usermem.Addr) (linux.NumaPolicy, uint64
}
// SetNumaPolicy implements the semantics of Linux's mbind().
-func (mm *MemoryManager) SetNumaPolicy(addr usermem.Addr, length uint64, policy linux.NumaPolicy, nodemask uint64) error {
+func (mm *MemoryManager) SetNumaPolicy(addr hostarch.Addr, length uint64, policy linux.NumaPolicy, nodemask uint64) error {
if !addr.IsPageAligned() {
return syserror.EINVAL
}
// Linux allows this to overflow.
- la, _ := usermem.Addr(length).RoundUp()
+ la, _ := hostarch.Addr(length).RoundUp()
ar, ok := addr.ToRange(uint64(la))
if !ok {
return syserror.EINVAL
@@ -1018,7 +1018,7 @@ func (mm *MemoryManager) SetNumaPolicy(addr usermem.Addr, length uint64, policy
}
// SetDontFork implements the semantics of madvise MADV_DONTFORK.
-func (mm *MemoryManager) SetDontFork(addr usermem.Addr, length uint64, dontfork bool) error {
+func (mm *MemoryManager) SetDontFork(addr hostarch.Addr, length uint64, dontfork bool) error {
ar, ok := addr.ToRange(length)
if !ok {
return syserror.EINVAL
@@ -1044,7 +1044,7 @@ func (mm *MemoryManager) SetDontFork(addr usermem.Addr, length uint64, dontfork
}
// Decommit implements the semantics of Linux's madvise(MADV_DONTNEED).
-func (mm *MemoryManager) Decommit(addr usermem.Addr, length uint64) error {
+func (mm *MemoryManager) Decommit(addr hostarch.Addr, length uint64) error {
ar, ok := addr.ToRange(length)
if !ok {
return syserror.EINVAL
@@ -1112,14 +1112,14 @@ type MSyncOpts struct {
}
// MSync implements the semantics of Linux's msync().
-func (mm *MemoryManager) MSync(ctx context.Context, addr usermem.Addr, length uint64, opts MSyncOpts) error {
+func (mm *MemoryManager) MSync(ctx context.Context, addr hostarch.Addr, length uint64, opts MSyncOpts) error {
if addr != addr.RoundDown() {
return syserror.EINVAL
}
if length == 0 {
return nil
}
- la, ok := usermem.Addr(length).RoundUp()
+ la, ok := hostarch.Addr(length).RoundUp()
if !ok {
return syserror.ENOMEM
}
@@ -1188,7 +1188,7 @@ func (mm *MemoryManager) MSync(ctx context.Context, addr usermem.Addr, length ui
}
// GetSharedFutexKey is used by kernel.Task.GetSharedKey.
-func (mm *MemoryManager) GetSharedFutexKey(ctx context.Context, addr usermem.Addr) (futex.Key, error) {
+func (mm *MemoryManager) GetSharedFutexKey(ctx context.Context, addr hostarch.Addr) (futex.Key, error) {
ar, ok := addr.ToRange(4) // sizeof(int32).
if !ok {
return futex.Key{}, syserror.EFAULT
@@ -1196,7 +1196,7 @@ func (mm *MemoryManager) GetSharedFutexKey(ctx context.Context, addr usermem.Add
mm.mappingMu.RLock()
defer mm.mappingMu.RUnlock()
- vseg, _, err := mm.getVMAsLocked(ctx, ar, usermem.Read, false)
+ vseg, _, err := mm.getVMAsLocked(ctx, ar, hostarch.Read, false)
if err != nil {
return futex.Key{}, err
}
@@ -1230,7 +1230,7 @@ func (mm *MemoryManager) VirtualMemorySize() uint64 {
// VirtualMemorySizeRange returns the combined length in bytes of all mappings
// in ar in mm.
-func (mm *MemoryManager) VirtualMemorySizeRange(ar usermem.AddrRange) uint64 {
+func (mm *MemoryManager) VirtualMemorySizeRange(ar hostarch.AddrRange) uint64 {
mm.mappingMu.RLock()
defer mm.mappingMu.RUnlock()
return uint64(mm.vmas.SpanRange(ar))
diff --git a/pkg/sentry/mm/vma.go b/pkg/sentry/mm/vma.go
index b8df72813..0d019e41d 100644
--- a/pkg/sentry/mm/vma.go
+++ b/pkg/sentry/mm/vma.go
@@ -19,18 +19,18 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/limits"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
// Preconditions:
// * mm.mappingMu must be locked for writing.
// * opts must be valid as defined by the checks in MMap.
-func (mm *MemoryManager) createVMALocked(ctx context.Context, opts memmap.MMapOpts) (vmaIterator, usermem.AddrRange, error) {
+func (mm *MemoryManager) createVMALocked(ctx context.Context, opts memmap.MMapOpts) (vmaIterator, hostarch.AddrRange, error) {
if opts.MaxPerms != opts.MaxPerms.Effective() {
panic(fmt.Sprintf("Non-effective MaxPerms %s cannot be enforced", opts.MaxPerms))
}
@@ -47,7 +47,7 @@ func (mm *MemoryManager) createVMALocked(ctx context.Context, opts memmap.MMapOp
if opts.Force && opts.Unmap && opts.Fixed {
addr = opts.Addr
} else {
- return vmaIterator{}, usermem.AddrRange{}, err
+ return vmaIterator{}, hostarch.AddrRange{}, err
}
}
ar, _ := addr.ToRange(opts.Length)
@@ -58,7 +58,7 @@ func (mm *MemoryManager) createVMALocked(ctx context.Context, opts memmap.MMapOp
newUsageAS -= uint64(mm.vmas.SpanRange(ar))
}
if limitAS := limits.FromContext(ctx).Get(limits.AS).Cur; newUsageAS > limitAS {
- return vmaIterator{}, usermem.AddrRange{}, syserror.ENOMEM
+ return vmaIterator{}, hostarch.AddrRange{}, syserror.ENOMEM
}
if opts.MLockMode != memmap.MLockNone {
@@ -66,14 +66,14 @@ func (mm *MemoryManager) createVMALocked(ctx context.Context, opts memmap.MMapOp
if creds := auth.CredentialsFromContext(ctx); !creds.HasCapabilityIn(linux.CAP_IPC_LOCK, creds.UserNamespace.Root()) {
mlockLimit := limits.FromContext(ctx).Get(limits.MemoryLocked).Cur
if mlockLimit == 0 {
- return vmaIterator{}, usermem.AddrRange{}, syserror.EPERM
+ return vmaIterator{}, hostarch.AddrRange{}, syserror.EPERM
}
newLockedAS := mm.lockedAS + opts.Length
if opts.Unmap {
newLockedAS -= mm.mlockedBytesRangeLocked(ar)
}
if newLockedAS > mlockLimit {
- return vmaIterator{}, usermem.AddrRange{}, syserror.EAGAIN
+ return vmaIterator{}, hostarch.AddrRange{}, syserror.EAGAIN
}
}
}
@@ -93,7 +93,7 @@ func (mm *MemoryManager) createVMALocked(ctx context.Context, opts memmap.MMapOp
// The expression for writable is vma.canWriteMappableLocked(), but we
// don't yet have a vma.
if err := opts.Mappable.AddMapping(ctx, mm, ar, opts.Offset, !opts.Private && opts.MaxPerms.Write); err != nil {
- return vmaIterator{}, usermem.AddrRange{}, err
+ return vmaIterator{}, hostarch.AddrRange{}, err
}
}
@@ -137,7 +137,7 @@ type findAvailableOpts struct {
//
// - Unmap allows existing guard pages in the returned range.
- Addr usermem.Addr
+ Addr hostarch.Addr
Fixed bool
Unmap bool
Map32Bit bool
@@ -153,13 +153,13 @@ const (
// findAvailableLocked finds an allocatable range.
//
// Preconditions: mm.mappingMu must be locked.
-func (mm *MemoryManager) findAvailableLocked(length uint64, opts findAvailableOpts) (usermem.Addr, error) {
+func (mm *MemoryManager) findAvailableLocked(length uint64, opts findAvailableOpts) (hostarch.Addr, error) {
if opts.Fixed {
opts.Map32Bit = false
}
allowedAR := mm.applicationAddrRange()
if opts.Map32Bit {
- allowedAR = allowedAR.Intersect(usermem.AddrRange{map32Start, map32End})
+ allowedAR = allowedAR.Intersect(hostarch.AddrRange{map32Start, map32End})
}
// Does the provided suggestion work?
@@ -181,33 +181,33 @@ func (mm *MemoryManager) findAvailableLocked(length uint64, opts findAvailableOp
}
// Prefer hugepage alignment if a hugepage or more is requested.
- alignment := uint64(usermem.PageSize)
- if length >= usermem.HugePageSize {
- alignment = usermem.HugePageSize
+ alignment := uint64(hostarch.PageSize)
+ if length >= hostarch.HugePageSize {
+ alignment = hostarch.HugePageSize
}
if opts.Map32Bit {
return mm.findLowestAvailableLocked(length, alignment, allowedAR)
}
if mm.layout.DefaultDirection == arch.MmapBottomUp {
- return mm.findLowestAvailableLocked(length, alignment, usermem.AddrRange{mm.layout.BottomUpBase, mm.layout.MaxAddr})
+ return mm.findLowestAvailableLocked(length, alignment, hostarch.AddrRange{mm.layout.BottomUpBase, mm.layout.MaxAddr})
}
- return mm.findHighestAvailableLocked(length, alignment, usermem.AddrRange{mm.layout.MinAddr, mm.layout.TopDownBase})
+ return mm.findHighestAvailableLocked(length, alignment, hostarch.AddrRange{mm.layout.MinAddr, mm.layout.TopDownBase})
}
-func (mm *MemoryManager) applicationAddrRange() usermem.AddrRange {
- return usermem.AddrRange{mm.layout.MinAddr, mm.layout.MaxAddr}
+func (mm *MemoryManager) applicationAddrRange() hostarch.AddrRange {
+ return hostarch.AddrRange{mm.layout.MinAddr, mm.layout.MaxAddr}
}
// Preconditions: mm.mappingMu must be locked.
-func (mm *MemoryManager) findLowestAvailableLocked(length, alignment uint64, bounds usermem.AddrRange) (usermem.Addr, error) {
- for gap := mm.vmas.LowerBoundGap(bounds.Start); gap.Ok() && gap.Start() < bounds.End; gap = gap.NextLargeEnoughGap(usermem.Addr(length)) {
+func (mm *MemoryManager) findLowestAvailableLocked(length, alignment uint64, bounds hostarch.AddrRange) (hostarch.Addr, error) {
+ for gap := mm.vmas.LowerBoundGap(bounds.Start); gap.Ok() && gap.Start() < bounds.End; gap = gap.NextLargeEnoughGap(hostarch.Addr(length)) {
if gr := gap.availableRange().Intersect(bounds); uint64(gr.Length()) >= length {
// Can we shift up to match the alignment?
if offset := uint64(gr.Start) % alignment; offset != 0 {
if uint64(gr.Length()) >= length+alignment-offset {
// Yes, we're aligned.
- return gr.Start + usermem.Addr(alignment-offset), nil
+ return gr.Start + hostarch.Addr(alignment-offset), nil
}
}
@@ -219,15 +219,15 @@ func (mm *MemoryManager) findLowestAvailableLocked(length, alignment uint64, bou
}
// Preconditions: mm.mappingMu must be locked.
-func (mm *MemoryManager) findHighestAvailableLocked(length, alignment uint64, bounds usermem.AddrRange) (usermem.Addr, error) {
- for gap := mm.vmas.UpperBoundGap(bounds.End); gap.Ok() && gap.End() > bounds.Start; gap = gap.PrevLargeEnoughGap(usermem.Addr(length)) {
+func (mm *MemoryManager) findHighestAvailableLocked(length, alignment uint64, bounds hostarch.AddrRange) (hostarch.Addr, error) {
+ for gap := mm.vmas.UpperBoundGap(bounds.End); gap.Ok() && gap.End() > bounds.Start; gap = gap.PrevLargeEnoughGap(hostarch.Addr(length)) {
if gr := gap.availableRange().Intersect(bounds); uint64(gr.Length()) >= length {
// Can we shift down to match the alignment?
- start := gr.End - usermem.Addr(length)
+ start := gr.End - hostarch.Addr(length)
if offset := uint64(start) % alignment; offset != 0 {
- if gr.Start <= start-usermem.Addr(offset) {
+ if gr.Start <= start-hostarch.Addr(offset) {
// Yes, we're aligned.
- return start - usermem.Addr(offset), nil
+ return start - hostarch.Addr(offset), nil
}
}
@@ -239,7 +239,7 @@ func (mm *MemoryManager) findHighestAvailableLocked(length, alignment uint64, bo
}
// Preconditions: mm.mappingMu must be locked.
-func (mm *MemoryManager) mlockedBytesRangeLocked(ar usermem.AddrRange) uint64 {
+func (mm *MemoryManager) mlockedBytesRangeLocked(ar hostarch.AddrRange) uint64 {
var total uint64
for vseg := mm.vmas.LowerBoundSegment(ar.Start); vseg.Ok() && vseg.Start() < ar.End; vseg = vseg.NextSegment() {
if vseg.ValuePtr().mlockMode != memmap.MLockNone {
@@ -264,7 +264,7 @@ func (mm *MemoryManager) mlockedBytesRangeLocked(ar usermem.AddrRange) uint64 {
// Preconditions:
// * mm.mappingMu must be locked for reading; it may be temporarily unlocked.
// * ar.Length() != 0.
-func (mm *MemoryManager) getVMAsLocked(ctx context.Context, ar usermem.AddrRange, at usermem.AccessType, ignorePermissions bool) (vmaIterator, vmaGapIterator, error) {
+func (mm *MemoryManager) getVMAsLocked(ctx context.Context, ar hostarch.AddrRange, at hostarch.AccessType, ignorePermissions bool) (vmaIterator, vmaGapIterator, error) {
if checkInvariants {
if !ar.WellFormed() || ar.Length() == 0 {
panic(fmt.Sprintf("invalid ar: %v", ar))
@@ -320,7 +320,7 @@ func (mm *MemoryManager) getVMAsLocked(ctx context.Context, ar usermem.AddrRange
// temporarily unlocked.
//
// Postconditions: ars is not mutated.
-func (mm *MemoryManager) getVecVMAsLocked(ctx context.Context, ars usermem.AddrRangeSeq, at usermem.AccessType, ignorePermissions bool) (usermem.AddrRangeSeq, error) {
+func (mm *MemoryManager) getVecVMAsLocked(ctx context.Context, ars hostarch.AddrRangeSeq, at hostarch.AccessType, ignorePermissions bool) (hostarch.AddrRangeSeq, error) {
for arsit := ars; !arsit.IsEmpty(); arsit = arsit.Tail() {
ar := arsit.Head()
if ar.Length() == 0 {
@@ -339,7 +339,7 @@ func (mm *MemoryManager) getVecVMAsLocked(ctx context.Context, ars usermem.AddrR
//
// guardBytes is equivalent to Linux's stack_guard_gap after upstream
// 1be7107fbe18 "mm: larger stack guard gap, between vmas".
-const guardBytes = 256 * usermem.PageSize
+const guardBytes = 256 * hostarch.PageSize
// unmapLocked unmaps all addresses in ar and returns the resulting gap in
// mm.vmas.
@@ -348,7 +348,7 @@ const guardBytes = 256 * usermem.PageSize
// * mm.mappingMu must be locked for writing.
// * ar.Length() != 0.
// * ar must be page-aligned.
-func (mm *MemoryManager) unmapLocked(ctx context.Context, ar usermem.AddrRange) vmaGapIterator {
+func (mm *MemoryManager) unmapLocked(ctx context.Context, ar hostarch.AddrRange) vmaGapIterator {
if checkInvariants {
if !ar.WellFormed() || ar.Length() == 0 || !ar.IsPageAligned() {
panic(fmt.Sprintf("invalid ar: %v", ar))
@@ -369,7 +369,7 @@ func (mm *MemoryManager) unmapLocked(ctx context.Context, ar usermem.AddrRange)
// * mm.mappingMu must be locked for writing.
// * ar.Length() != 0.
// * ar must be page-aligned.
-func (mm *MemoryManager) removeVMAsLocked(ctx context.Context, ar usermem.AddrRange) vmaGapIterator {
+func (mm *MemoryManager) removeVMAsLocked(ctx context.Context, ar hostarch.AddrRange) vmaGapIterator {
if checkInvariants {
if !ar.WellFormed() || ar.Length() == 0 || !ar.IsPageAligned() {
panic(fmt.Sprintf("invalid ar: %v", ar))
@@ -426,12 +426,12 @@ func (vma *vma) isPrivateDataLocked() bool {
// vmaSetFunctions implements segment.Functions for vmaSet.
type vmaSetFunctions struct{}
-func (vmaSetFunctions) MinKey() usermem.Addr {
+func (vmaSetFunctions) MinKey() hostarch.Addr {
return 0
}
-func (vmaSetFunctions) MaxKey() usermem.Addr {
- return ^usermem.Addr(0)
+func (vmaSetFunctions) MaxKey() hostarch.Addr {
+ return ^hostarch.Addr(0)
}
func (vmaSetFunctions) ClearValue(vma *vma) {
@@ -440,7 +440,7 @@ func (vmaSetFunctions) ClearValue(vma *vma) {
vma.hint = ""
}
-func (vmaSetFunctions) Merge(ar1 usermem.AddrRange, vma1 vma, ar2 usermem.AddrRange, vma2 vma) (vma, bool) {
+func (vmaSetFunctions) Merge(ar1 hostarch.AddrRange, vma1 vma, ar2 hostarch.AddrRange, vma2 vma) (vma, bool) {
if vma1.mappable != vma2.mappable ||
(vma1.mappable != nil && vma1.off+uint64(ar1.Length()) != vma2.off) ||
vma1.realPerms != vma2.realPerms ||
@@ -462,7 +462,7 @@ func (vmaSetFunctions) Merge(ar1 usermem.AddrRange, vma1 vma, ar2 usermem.AddrRa
return vma1, true
}
-func (vmaSetFunctions) Split(ar usermem.AddrRange, v vma, split usermem.Addr) (vma, vma) {
+func (vmaSetFunctions) Split(ar hostarch.AddrRange, v vma, split hostarch.Addr) (vma, vma) {
v2 := v
if v2.mappable != nil {
v2.off += uint64(split - ar.Start)
@@ -476,7 +476,7 @@ func (vmaSetFunctions) Split(ar usermem.AddrRange, v vma, split usermem.Addr) (v
// Preconditions:
// * vseg.ValuePtr().mappable != nil.
// * vseg.Range().Contains(addr).
-func (vseg vmaIterator) mappableOffsetAt(addr usermem.Addr) uint64 {
+func (vseg vmaIterator) mappableOffsetAt(addr hostarch.Addr) uint64 {
if checkInvariants {
if !vseg.Ok() {
panic("terminal vma iterator")
@@ -503,7 +503,7 @@ func (vseg vmaIterator) mappableRange() memmap.MappableRange {
// * vseg.ValuePtr().mappable != nil.
// * vseg.Range().IsSupersetOf(ar).
// * ar.Length() != 0.
-func (vseg vmaIterator) mappableRangeOf(ar usermem.AddrRange) memmap.MappableRange {
+func (vseg vmaIterator) mappableRangeOf(ar hostarch.AddrRange) memmap.MappableRange {
if checkInvariants {
if !vseg.Ok() {
panic("terminal vma iterator")
@@ -528,7 +528,7 @@ func (vseg vmaIterator) mappableRangeOf(ar usermem.AddrRange) memmap.MappableRan
// * vseg.ValuePtr().mappable != nil.
// * vseg.mappableRange().IsSupersetOf(mr).
// * mr.Length() != 0.
-func (vseg vmaIterator) addrRangeOf(mr memmap.MappableRange) usermem.AddrRange {
+func (vseg vmaIterator) addrRangeOf(mr memmap.MappableRange) hostarch.AddrRange {
if checkInvariants {
if !vseg.Ok() {
panic("terminal vma iterator")
@@ -546,7 +546,7 @@ func (vseg vmaIterator) addrRangeOf(mr memmap.MappableRange) usermem.AddrRange {
vma := vseg.ValuePtr()
vstart := vseg.Start()
- return usermem.AddrRange{vstart + usermem.Addr(mr.Start-vma.off), vstart + usermem.Addr(mr.End-vma.off)}
+ return hostarch.AddrRange{vstart + hostarch.Addr(mr.Start-vma.off), vstart + hostarch.Addr(mr.End-vma.off)}
}
// seekNextLowerBound returns mm.vmas.LowerBoundSegment(addr), but does so by
@@ -555,7 +555,7 @@ func (vseg vmaIterator) addrRangeOf(mr memmap.MappableRange) usermem.AddrRange {
// Preconditions:
// * mm.mappingMu must be locked.
// * addr >= vseg.Start().
-func (vseg vmaIterator) seekNextLowerBound(addr usermem.Addr) vmaIterator {
+func (vseg vmaIterator) seekNextLowerBound(addr hostarch.Addr) vmaIterator {
if checkInvariants {
if !vseg.Ok() {
panic("terminal vma iterator")
@@ -572,7 +572,7 @@ func (vseg vmaIterator) seekNextLowerBound(addr usermem.Addr) vmaIterator {
// availableRange returns the subset of vgap.Range() in which new vmas may be
// created without MMapOpts.Unmap == true.
-func (vgap vmaGapIterator) availableRange() usermem.AddrRange {
+func (vgap vmaGapIterator) availableRange() hostarch.AddrRange {
ar := vgap.Range()
next := vgap.NextSegment()
if !next.Ok() || !next.ValuePtr().growsDown {
@@ -580,7 +580,7 @@ func (vgap vmaGapIterator) availableRange() usermem.AddrRange {
}
// Exclude guard pages.
if ar.Length() < guardBytes {
- return usermem.AddrRange{ar.Start, ar.Start}
+ return hostarch.AddrRange{ar.Start, ar.Start}
}
ar.End -= guardBytes
return ar
diff --git a/pkg/sentry/pgalloc/BUILD b/pkg/sentry/pgalloc/BUILD
index e5bf13c40..57d73d770 100644
--- a/pkg/sentry/pgalloc/BUILD
+++ b/pkg/sentry/pgalloc/BUILD
@@ -85,6 +85,7 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/hostarch",
"//pkg/log",
"//pkg/memutil",
"//pkg/safemem",
@@ -106,5 +107,5 @@ go_test(
size = "small",
srcs = ["pgalloc_test.go"],
library = ":pgalloc",
- deps = ["//pkg/usermem"],
+ deps = ["//pkg/hostarch"],
)
diff --git a/pkg/sentry/pgalloc/pgalloc.go b/pkg/sentry/pgalloc/pgalloc.go
index a4af3e21b..b81292c46 100644
--- a/pkg/sentry/pgalloc/pgalloc.go
+++ b/pkg/sentry/pgalloc/pgalloc.go
@@ -31,6 +31,7 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/hostmm"
@@ -38,7 +39,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/usage"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
// MemoryFile is a memmap.File whose pages may be allocated to arbitrary
@@ -283,7 +283,7 @@ const (
chunkMask = chunkSize - 1
// maxPage is the highest 64-bit page.
- maxPage = math.MaxUint64 &^ (usermem.PageSize - 1)
+ maxPage = math.MaxUint64 &^ (hostarch.PageSize - 1)
)
// NewMemoryFile creates a MemoryFile backed by the given file. If
@@ -344,7 +344,7 @@ func NewMemoryFile(file *os.File, opts MemoryFileOpts) (*MemoryFile, error) {
m, _, errno := unix.Syscall6(
unix.SYS_MMAP,
0,
- usermem.PageSize,
+ hostarch.PageSize,
unix.PROT_EXEC,
unix.MAP_SHARED,
file.Fd(),
@@ -357,7 +357,7 @@ func NewMemoryFile(file *os.File, opts MemoryFileOpts) (*MemoryFile, error) {
if _, _, errno := unix.Syscall(
unix.SYS_MUNMAP,
m,
- usermem.PageSize,
+ hostarch.PageSize,
0); errno != 0 {
panic(fmt.Sprintf("failed to unmap PROT_EXEC MemoryFile mapping: %v", errno))
}
@@ -386,7 +386,7 @@ func (f *MemoryFile) Destroy() {
//
// Preconditions: length must be page-aligned and non-zero.
func (f *MemoryFile) Allocate(length uint64, kind usage.MemoryKind) (memmap.FileRange, error) {
- if length == 0 || length%usermem.PageSize != 0 {
+ if length == 0 || length%hostarch.PageSize != 0 {
panic(fmt.Sprintf("invalid allocation length: %#x", length))
}
@@ -395,9 +395,9 @@ func (f *MemoryFile) Allocate(length uint64, kind usage.MemoryKind) (memmap.File
// Align hugepage-and-larger allocations on hugepage boundaries to try
// to take advantage of hugetmpfs.
- alignment := uint64(usermem.PageSize)
- if length >= usermem.HugePageSize {
- alignment = usermem.HugePageSize
+ alignment := uint64(hostarch.PageSize)
+ if length >= hostarch.HugePageSize {
+ alignment = hostarch.HugePageSize
}
// Find a range in the underlying file.
@@ -524,13 +524,13 @@ func (f *MemoryFile) AllocateAndFill(length uint64, kind usage.MemoryKind, r saf
if err != nil {
return memmap.FileRange{}, err
}
- dsts, err := f.MapInternal(fr, usermem.Write)
+ dsts, err := f.MapInternal(fr, hostarch.Write)
if err != nil {
f.DecRef(fr)
return memmap.FileRange{}, err
}
n, err := safemem.ReadFullToBlocks(r, dsts)
- un := uint64(usermem.Addr(n).RoundDown())
+ un := uint64(hostarch.Addr(n).RoundDown())
if un < length {
// Free unused memory and update fr to contain only the memory that is
// still allocated.
@@ -552,7 +552,7 @@ const (
//
// Preconditions: fr.Length() > 0.
func (f *MemoryFile) Decommit(fr memmap.FileRange) error {
- if !fr.WellFormed() || fr.Length() == 0 || fr.Start%usermem.PageSize != 0 || fr.End%usermem.PageSize != 0 {
+ if !fr.WellFormed() || fr.Length() == 0 || fr.Start%hostarch.PageSize != 0 || fr.End%hostarch.PageSize != 0 {
panic(fmt.Sprintf("invalid range: %v", fr))
}
@@ -614,7 +614,7 @@ func (f *MemoryFile) markDecommitted(fr memmap.FileRange) {
// IncRef implements memmap.File.IncRef.
func (f *MemoryFile) IncRef(fr memmap.FileRange) {
- if !fr.WellFormed() || fr.Length() == 0 || fr.Start%usermem.PageSize != 0 || fr.End%usermem.PageSize != 0 {
+ if !fr.WellFormed() || fr.Length() == 0 || fr.Start%hostarch.PageSize != 0 || fr.End%hostarch.PageSize != 0 {
panic(fmt.Sprintf("invalid range: %v", fr))
}
@@ -633,7 +633,7 @@ func (f *MemoryFile) IncRef(fr memmap.FileRange) {
// DecRef implements memmap.File.DecRef.
func (f *MemoryFile) DecRef(fr memmap.FileRange) {
- if !fr.WellFormed() || fr.Length() == 0 || fr.Start%usermem.PageSize != 0 || fr.End%usermem.PageSize != 0 {
+ if !fr.WellFormed() || fr.Length() == 0 || fr.Start%hostarch.PageSize != 0 || fr.End%hostarch.PageSize != 0 {
panic(fmt.Sprintf("invalid range: %v", fr))
}
@@ -669,7 +669,7 @@ func (f *MemoryFile) DecRef(fr memmap.FileRange) {
}
// MapInternal implements memmap.File.MapInternal.
-func (f *MemoryFile) MapInternal(fr memmap.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) {
+func (f *MemoryFile) MapInternal(fr memmap.FileRange, at hostarch.AccessType) (safemem.BlockSeq, error) {
if !fr.WellFormed() || fr.Length() == 0 {
panic(fmt.Sprintf("invalid range: %v", fr))
}
@@ -935,7 +935,7 @@ func (f *MemoryFile) updateUsageLocked(currentUsage uint64, checkCommitted func(
// Ensure that we have sufficient buffer for the call
// (one byte per page). The length of each slice must
// be page-aligned.
- bufLen := len(s) / usermem.PageSize
+ bufLen := len(s) / hostarch.PageSize
if len(buf) < bufLen {
buf = make([]byte, bufLen)
}
@@ -967,8 +967,8 @@ func (f *MemoryFile) updateUsageLocked(currentUsage uint64, checkCommitted func(
}
}
committedFR := memmap.FileRange{
- Start: r.Start + uint64(i*usermem.PageSize),
- End: r.Start + uint64(j*usermem.PageSize),
+ Start: r.Start + uint64(i*hostarch.PageSize),
+ End: r.Start + uint64(j*hostarch.PageSize),
}
// Advance seg to committedFR.Start.
for seg.Ok() && seg.End() < committedFR.Start {
diff --git a/pkg/sentry/pgalloc/pgalloc_test.go b/pkg/sentry/pgalloc/pgalloc_test.go
index 405db141f..8d2b7eb5e 100644
--- a/pkg/sentry/pgalloc/pgalloc_test.go
+++ b/pkg/sentry/pgalloc/pgalloc_test.go
@@ -17,12 +17,12 @@ package pgalloc
import (
"testing"
- "gvisor.dev/gvisor/pkg/usermem"
+ "gvisor.dev/gvisor/pkg/hostarch"
)
const (
- page = usermem.PageSize
- hugepage = usermem.HugePageSize
+ page = hostarch.PageSize
+ hugepage = hostarch.HugePageSize
topPage = (1 << 63) - page
)
diff --git a/pkg/sentry/pgalloc/save_restore.go b/pkg/sentry/pgalloc/save_restore.go
index e05c8d074..345cdde55 100644
--- a/pkg/sentry/pgalloc/save_restore.go
+++ b/pkg/sentry/pgalloc/save_restore.go
@@ -23,11 +23,11 @@ import (
"sync/atomic"
"golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/usage"
"gvisor.dev/gvisor/pkg/state"
"gvisor.dev/gvisor/pkg/state/wire"
- "gvisor.dev/gvisor/pkg/usermem"
)
// SaveTo writes f's state to the given stream.
@@ -49,11 +49,11 @@ func (f *MemoryFile) SaveTo(ctx context.Context, w wire.Writer) error {
// Ensure that all pages that contain data have knownCommitted set, since
// we only store knownCommitted pages below.
- zeroPage := make([]byte, usermem.PageSize)
+ zeroPage := make([]byte, hostarch.PageSize)
err := f.updateUsageLocked(0, func(bs []byte, committed []byte) error {
- for pgoff := 0; pgoff < len(bs); pgoff += usermem.PageSize {
- i := pgoff / usermem.PageSize
- pg := bs[pgoff : pgoff+usermem.PageSize]
+ for pgoff := 0; pgoff < len(bs); pgoff += hostarch.PageSize {
+ i := pgoff / hostarch.PageSize
+ pg := bs[pgoff : pgoff+hostarch.PageSize]
if !bytes.Equal(pg, zeroPage) {
committed[i] = 1
continue
diff --git a/pkg/sentry/platform/BUILD b/pkg/sentry/platform/BUILD
index db7d55ef2..7125657b3 100644
--- a/pkg/sentry/platform/BUILD
+++ b/pkg/sentry/platform/BUILD
@@ -13,6 +13,7 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/hostarch",
"//pkg/seccomp",
"//pkg/sentry/arch",
"//pkg/sentry/hostmm",
diff --git a/pkg/sentry/platform/kvm/BUILD b/pkg/sentry/platform/kvm/BUILD
index 03a76eb9b..f04898dc1 100644
--- a/pkg/sentry/platform/kvm/BUILD
+++ b/pkg/sentry/platform/kvm/BUILD
@@ -43,6 +43,7 @@ go_library(
"//pkg/atomicbitops",
"//pkg/context",
"//pkg/cpuid",
+ "//pkg/hostarch",
"//pkg/log",
"//pkg/procid",
"//pkg/ring0",
@@ -56,7 +57,6 @@ go_library(
"//pkg/sentry/platform/interrupt",
"//pkg/sentry/time",
"//pkg/sync",
- "//pkg/usermem",
"@org_golang_x_sys//unix:go_default_library",
],
)
@@ -76,6 +76,7 @@ go_test(
"requires-kvm",
],
deps = [
+ "//pkg/hostarch",
"//pkg/ring0",
"//pkg/ring0/pagetables",
"//pkg/sentry/arch",
@@ -83,7 +84,6 @@ go_test(
"//pkg/sentry/platform",
"//pkg/sentry/platform/kvm/testutil",
"//pkg/sentry/time",
- "//pkg/usermem",
"@org_golang_x_sys//unix:go_default_library",
],
)
diff --git a/pkg/sentry/platform/kvm/address_space.go b/pkg/sentry/platform/kvm/address_space.go
index 25c21e843..5524e8727 100644
--- a/pkg/sentry/platform/kvm/address_space.go
+++ b/pkg/sentry/platform/kvm/address_space.go
@@ -18,11 +18,11 @@ import (
"sync/atomic"
"gvisor.dev/gvisor/pkg/atomicbitops"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/ring0/pagetables"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/platform"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/usermem"
)
// dirtySet tracks vCPUs for invalidation.
@@ -118,7 +118,7 @@ type hostMapEntry struct {
// +checkescape:hard,stack
//
//go:nosplit
-func (as *addressSpace) mapLocked(addr usermem.Addr, m hostMapEntry, at usermem.AccessType) (inv bool) {
+func (as *addressSpace) mapLocked(addr hostarch.Addr, m hostMapEntry, at hostarch.AccessType) (inv bool) {
for m.length > 0 {
physical, length, ok := translateToPhysical(m.addr)
if !ok {
@@ -144,14 +144,14 @@ func (as *addressSpace) mapLocked(addr usermem.Addr, m hostMapEntry, at usermem.
}, physical) || inv
m.addr += length
m.length -= length
- addr += usermem.Addr(length)
+ addr += hostarch.Addr(length)
}
return inv
}
// MapFile implements platform.AddressSpace.MapFile.
-func (as *addressSpace) MapFile(addr usermem.Addr, f memmap.File, fr memmap.FileRange, at usermem.AccessType, precommit bool) error {
+func (as *addressSpace) MapFile(addr hostarch.Addr, f memmap.File, fr memmap.FileRange, at hostarch.AccessType, precommit bool) error {
as.mu.Lock()
defer as.mu.Unlock()
@@ -165,7 +165,7 @@ func (as *addressSpace) MapFile(addr usermem.Addr, f memmap.File, fr memmap.File
// We don't execute from application file-mapped memory, and guest page
// tables don't care if we have execute permission (but they do need pages
// to be readable).
- bs, err := f.MapInternal(fr, usermem.AccessType{
+ bs, err := f.MapInternal(fr, hostarch.AccessType{
Read: at.Read || at.Execute || precommit,
Write: at.Write,
})
@@ -187,7 +187,7 @@ func (as *addressSpace) MapFile(addr usermem.Addr, f memmap.File, fr memmap.File
// lookup in our host page tables for this translation.
if precommit {
s := b.ToSlice()
- for i := 0; i < len(s); i += usermem.PageSize {
+ for i := 0; i < len(s); i += hostarch.PageSize {
_ = s[i] // Touch to commit.
}
}
@@ -201,7 +201,7 @@ func (as *addressSpace) MapFile(addr usermem.Addr, f memmap.File, fr memmap.File
length: uintptr(b.Len()),
}, at)
inv = inv || prev
- addr += usermem.Addr(b.Len())
+ addr += hostarch.Addr(b.Len())
}
if inv {
as.invalidate()
@@ -215,12 +215,12 @@ func (as *addressSpace) MapFile(addr usermem.Addr, f memmap.File, fr memmap.File
// +checkescape:hard,stack
//
//go:nosplit
-func (as *addressSpace) unmapLocked(addr usermem.Addr, length uint64) bool {
+func (as *addressSpace) unmapLocked(addr hostarch.Addr, length uint64) bool {
return as.pageTables.Unmap(addr, uintptr(length))
}
// Unmap unmaps the given range by calling pagetables.PageTables.Unmap.
-func (as *addressSpace) Unmap(addr usermem.Addr, length uint64) {
+func (as *addressSpace) Unmap(addr hostarch.Addr, length uint64) {
as.mu.Lock()
defer as.mu.Unlock()
diff --git a/pkg/sentry/platform/kvm/bluepill_fault.go b/pkg/sentry/platform/kvm/bluepill_fault.go
index 37c53fa02..28a613a54 100644
--- a/pkg/sentry/platform/kvm/bluepill_fault.go
+++ b/pkg/sentry/platform/kvm/bluepill_fault.go
@@ -18,7 +18,7 @@ import (
"sync/atomic"
"golang.org/x/sys/unix"
- "gvisor.dev/gvisor/pkg/usermem"
+ "gvisor.dev/gvisor/pkg/hostarch"
)
const (
@@ -47,7 +47,7 @@ func yield() {
//
//go:nosplit
func calculateBluepillFault(physical uintptr, phyRegions []physicalRegion) (virtualStart, physicalStart, length uintptr, ok bool) {
- alignedPhysical := physical &^ uintptr(usermem.PageSize-1)
+ alignedPhysical := physical &^ uintptr(hostarch.PageSize-1)
for _, pr := range phyRegions {
end := pr.physical + pr.length
if physical < pr.physical || physical >= end {
diff --git a/pkg/sentry/platform/kvm/context.go b/pkg/sentry/platform/kvm/context.go
index 706fa53dc..f4d4473a8 100644
--- a/pkg/sentry/platform/kvm/context.go
+++ b/pkg/sentry/platform/kvm/context.go
@@ -18,11 +18,11 @@ import (
"sync/atomic"
pkgcontext "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/ring0"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/platform"
"gvisor.dev/gvisor/pkg/sentry/platform/interrupt"
- "gvisor.dev/gvisor/pkg/usermem"
)
// context is an implementation of the platform context.
@@ -40,7 +40,7 @@ type context struct {
}
// Switch runs the provided context in the given address space.
-func (c *context) Switch(ctx pkgcontext.Context, mm platform.MemoryManager, ac arch.Context, _ int32) (*arch.SignalInfo, usermem.AccessType, error) {
+func (c *context) Switch(ctx pkgcontext.Context, mm platform.MemoryManager, ac arch.Context, _ int32) (*arch.SignalInfo, hostarch.AccessType, error) {
as := mm.AddressSpace()
localAS := as.(*addressSpace)
@@ -50,7 +50,7 @@ func (c *context) Switch(ctx pkgcontext.Context, mm platform.MemoryManager, ac a
// Enable interrupts (i.e. calls to vCPU.Notify).
if !c.interrupt.Enable(cpu) {
c.machine.Put(cpu) // Already preempted.
- return nil, usermem.NoAccess, platform.ErrContextInterrupt
+ return nil, hostarch.NoAccess, platform.ErrContextInterrupt
}
// Set the active address space.
diff --git a/pkg/sentry/platform/kvm/kvm.go b/pkg/sentry/platform/kvm/kvm.go
index 92c05a9ad..aac0fdffe 100644
--- a/pkg/sentry/platform/kvm/kvm.go
+++ b/pkg/sentry/platform/kvm/kvm.go
@@ -20,11 +20,11 @@ import (
"os"
"golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/ring0"
"gvisor.dev/gvisor/pkg/ring0/pagetables"
"gvisor.dev/gvisor/pkg/sentry/platform"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/usermem"
)
// userMemoryRegion is a region of physical memory.
@@ -146,13 +146,13 @@ func (*KVM) MapUnit() uint64 {
}
// MinUserAddress returns the lowest available address.
-func (*KVM) MinUserAddress() usermem.Addr {
- return usermem.PageSize
+func (*KVM) MinUserAddress() hostarch.Addr {
+ return hostarch.PageSize
}
// MaxUserAddress returns the first address that may not be used.
-func (*KVM) MaxUserAddress() usermem.Addr {
- return usermem.Addr(ring0.MaximumUserAddress)
+func (*KVM) MaxUserAddress() hostarch.Addr {
+ return hostarch.Addr(ring0.MaximumUserAddress)
}
// NewAddressSpace returns a new pagetable root.
diff --git a/pkg/sentry/platform/kvm/kvm_test.go b/pkg/sentry/platform/kvm/kvm_test.go
index 5bce16dde..ceff09a60 100644
--- a/pkg/sentry/platform/kvm/kvm_test.go
+++ b/pkg/sentry/platform/kvm/kvm_test.go
@@ -22,6 +22,7 @@ import (
"time"
"golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/ring0"
"gvisor.dev/gvisor/pkg/ring0/pagetables"
"gvisor.dev/gvisor/pkg/sentry/arch"
@@ -29,7 +30,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/platform"
"gvisor.dev/gvisor/pkg/sentry/platform/kvm/testutil"
ktime "gvisor.dev/gvisor/pkg/sentry/time"
- "gvisor.dev/gvisor/pkg/usermem"
)
var dummyFPState = fpu.NewState()
@@ -142,8 +142,8 @@ func applicationTest(t testHarness, useHostMappings bool, target func(), fn func
// done for regular user code, but is fine for test
// purposes.)
applyPhysicalRegions(func(pr physicalRegion) bool {
- pt.Map(usermem.Addr(pr.virtual), pr.length, pagetables.MapOpts{
- AccessType: usermem.AnyAccess,
+ pt.Map(hostarch.Addr(pr.virtual), pr.length, pagetables.MapOpts{
+ AccessType: hostarch.AnyAccess,
User: true,
}, pr.physical)
return true // Keep iterating.
@@ -351,7 +351,7 @@ func TestInvalidate(t *testing.T) {
break // Done.
}
// Unmap the page containing data & invalidate.
- pt.Unmap(usermem.Addr(reflect.ValueOf(&data).Pointer() & ^uintptr(usermem.PageSize-1)), usermem.PageSize)
+ pt.Unmap(hostarch.Addr(reflect.ValueOf(&data).Pointer() & ^uintptr(hostarch.PageSize-1)), hostarch.PageSize)
for {
var si arch.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go
index 0e4cf01e1..b3d4188a3 100644
--- a/pkg/sentry/platform/kvm/machine.go
+++ b/pkg/sentry/platform/kvm/machine.go
@@ -21,13 +21,13 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/atomicbitops"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/procid"
"gvisor.dev/gvisor/pkg/ring0"
"gvisor.dev/gvisor/pkg/ring0/pagetables"
ktime "gvisor.dev/gvisor/pkg/sentry/time"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/usermem"
)
// machine contains state associated with the VM as a whole.
@@ -227,9 +227,9 @@ func newMachine(vm int) (*machine, error) {
applyPhysicalRegions(func(pr physicalRegion) bool {
// Map everything in the lower half.
m.kernel.PageTables.Map(
- usermem.Addr(pr.virtual),
+ hostarch.Addr(pr.virtual),
pr.length,
- pagetables.MapOpts{AccessType: usermem.AnyAccess},
+ pagetables.MapOpts{AccessType: hostarch.AnyAccess},
pr.physical)
return true // Keep iterating.
@@ -436,7 +436,7 @@ func (m *machine) Get() *vCPU {
}
// The vCPU is not be able to transition to
- // vCPUGuest|vCPUUser or to vCPUUser because that
+ // vCPUGuest|vCPUWaiter or to vCPUUser because that
// transition requires holding the machine mutex, as we
// do now. There is no path to register a waiter on
// just the vCPUReady state.
diff --git a/pkg/sentry/platform/kvm/machine_amd64.go b/pkg/sentry/platform/kvm/machine_amd64.go
index 3af96c7e5..e8e209249 100644
--- a/pkg/sentry/platform/kvm/machine_amd64.go
+++ b/pkg/sentry/platform/kvm/machine_amd64.go
@@ -24,13 +24,13 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/cpuid"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/ring0"
"gvisor.dev/gvisor/pkg/ring0/pagetables"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/arch/fpu"
"gvisor.dev/gvisor/pkg/sentry/platform"
ktime "gvisor.dev/gvisor/pkg/sentry/time"
- "gvisor.dev/gvisor/pkg/usermem"
)
// initArchState initializes architecture-specific state.
@@ -41,7 +41,7 @@ func (m *machine) initArchState() error {
unix.SYS_IOCTL,
uintptr(m.fd),
_KVM_SET_TSS_ADDR,
- uintptr(reservedMemory-(3*usermem.PageSize))); errno != 0 {
+ uintptr(reservedMemory-(3*hostarch.PageSize))); errno != 0 {
return errno
}
@@ -256,19 +256,19 @@ func (c *vCPU) setSystemTime() error {
// nonCanonical generates a canonical address return.
//
//go:nosplit
-func nonCanonical(addr uint64, signal int32, info *arch.SignalInfo) (usermem.AccessType, error) {
+func nonCanonical(addr uint64, signal int32, info *arch.SignalInfo) (hostarch.AccessType, error) {
*info = arch.SignalInfo{
Signo: signal,
Code: arch.SignalInfoKernel,
}
info.SetAddr(addr) // Include address.
- return usermem.NoAccess, platform.ErrContextSignal
+ return hostarch.NoAccess, platform.ErrContextSignal
}
// fault generates an appropriate fault return.
//
//go:nosplit
-func (c *vCPU) fault(signal int32, info *arch.SignalInfo) (usermem.AccessType, error) {
+func (c *vCPU) fault(signal int32, info *arch.SignalInfo) (hostarch.AccessType, error) {
bluepill(c) // Probably no-op, but may not be.
faultAddr := ring0.ReadCR2()
code, user := c.ErrorCode()
@@ -276,12 +276,12 @@ func (c *vCPU) fault(signal int32, info *arch.SignalInfo) (usermem.AccessType, e
// The last fault serviced by this CPU was not a user
// fault, so we can't reliably trust the faultAddr or
// the code provided here. We need to re-execute.
- return usermem.NoAccess, platform.ErrContextInterrupt
+ return hostarch.NoAccess, platform.ErrContextInterrupt
}
// Reset the pointed SignalInfo.
*info = arch.SignalInfo{Signo: signal}
info.SetAddr(uint64(faultAddr))
- accessType := usermem.AccessType{
+ accessType := hostarch.AccessType{
Read: code&(1<<1) == 0,
Write: code&(1<<1) != 0,
Execute: code&(1<<4) != 0,
@@ -310,14 +310,14 @@ func loadByte(ptr *byte) byte {
//go:nosplit
func prefaultFloatingPointState(data *fpu.State) {
size := len(*data)
- for i := 0; i < size; i += usermem.PageSize {
+ for i := 0; i < size; i += hostarch.PageSize {
loadByte(&(*data)[i])
}
loadByte(&(*data)[size-1])
}
// SwitchToUser unpacks architectural-details.
-func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo) (usermem.AccessType, error) {
+func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo) (hostarch.AccessType, error) {
// Check for canonical addresses.
if regs := switchOpts.Registers; !ring0.IsCanonical(regs.Rip) {
return nonCanonical(regs.Rip, int32(unix.SIGSEGV), info)
@@ -353,7 +353,7 @@ func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo)
switch vector {
case ring0.Syscall, ring0.SyscallInt80:
// Fast path: system call executed.
- return usermem.NoAccess, nil
+ return hostarch.NoAccess, nil
case ring0.PageFault:
return c.fault(int32(unix.SIGSEGV), info)
@@ -364,7 +364,7 @@ func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo)
Code: 1, // TRAP_BRKPT (breakpoint).
}
info.SetAddr(switchOpts.Registers.Rip) // Include address.
- return usermem.AccessType{}, platform.ErrContextSignal
+ return hostarch.AccessType{}, platform.ErrContextSignal
case ring0.GeneralProtectionFault,
ring0.SegmentNotPresent,
@@ -380,9 +380,9 @@ func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo)
// When CPUID faulting is enabled, we will generate a #GP(0) when
// userspace executes a CPUID instruction. This is handled above,
// because we need to be able to map and read user memory.
- return usermem.AccessType{}, platform.ErrContextSignalCPUID
+ return hostarch.AccessType{}, platform.ErrContextSignalCPUID
}
- return usermem.AccessType{}, platform.ErrContextSignal
+ return hostarch.AccessType{}, platform.ErrContextSignal
case ring0.InvalidOpcode:
*info = arch.SignalInfo{
@@ -390,7 +390,7 @@ func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo)
Code: 1, // ILL_ILLOPC (illegal opcode).
}
info.SetAddr(switchOpts.Registers.Rip) // Include address.
- return usermem.AccessType{}, platform.ErrContextSignal
+ return hostarch.AccessType{}, platform.ErrContextSignal
case ring0.DivideByZero:
*info = arch.SignalInfo{
@@ -398,7 +398,7 @@ func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo)
Code: 1, // FPE_INTDIV (divide by zero).
}
info.SetAddr(switchOpts.Registers.Rip) // Include address.
- return usermem.AccessType{}, platform.ErrContextSignal
+ return hostarch.AccessType{}, platform.ErrContextSignal
case ring0.Overflow:
*info = arch.SignalInfo{
@@ -406,7 +406,7 @@ func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo)
Code: 2, // FPE_INTOVF (integer overflow).
}
info.SetAddr(switchOpts.Registers.Rip) // Include address.
- return usermem.AccessType{}, platform.ErrContextSignal
+ return hostarch.AccessType{}, platform.ErrContextSignal
case ring0.X87FloatingPointException,
ring0.SIMDFloatingPointException:
@@ -415,17 +415,17 @@ func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo)
Code: 7, // FPE_FLTINV (invalid operation).
}
info.SetAddr(switchOpts.Registers.Rip) // Include address.
- return usermem.AccessType{}, platform.ErrContextSignal
+ return hostarch.AccessType{}, platform.ErrContextSignal
case ring0.Vector(bounce): // ring0.VirtualizationException
- return usermem.NoAccess, platform.ErrContextInterrupt
+ return hostarch.NoAccess, platform.ErrContextInterrupt
case ring0.AlignmentCheck:
*info = arch.SignalInfo{
Signo: int32(unix.SIGBUS),
Code: 2, // BUS_ADRERR (physical address does not exist).
}
- return usermem.NoAccess, platform.ErrContextSignal
+ return hostarch.NoAccess, platform.ErrContextSignal
case ring0.NMI:
// An NMI is generated only when a fault is not servicable by
@@ -471,9 +471,9 @@ func (m *machine) mapUpperHalf(pageTable *pagetables.PageTables) {
panic("impossible translation")
}
pageTable.Map(
- usermem.Addr(ring0.KernelStartAddress|r.virtual),
+ hostarch.Addr(ring0.KernelStartAddress|r.virtual),
r.length,
- pagetables.MapOpts{AccessType: usermem.Execute},
+ pagetables.MapOpts{AccessType: hostarch.Execute},
physical)
}
})
@@ -484,9 +484,9 @@ func (m *machine) mapUpperHalf(pageTable *pagetables.PageTables) {
panic("impossible translation")
}
pageTable.Map(
- usermem.Addr(ring0.KernelStartAddress|start),
+ hostarch.Addr(ring0.KernelStartAddress|start),
regionLen,
- pagetables.MapOpts{AccessType: usermem.ReadWrite},
+ pagetables.MapOpts{AccessType: hostarch.ReadWrite},
physical)
}
}
diff --git a/pkg/sentry/platform/kvm/machine_arm64.go b/pkg/sentry/platform/kvm/machine_arm64.go
index 2055e61a7..cd912f922 100644
--- a/pkg/sentry/platform/kvm/machine_arm64.go
+++ b/pkg/sentry/platform/kvm/machine_arm64.go
@@ -17,12 +17,12 @@
package kvm
import (
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/ring0"
"gvisor.dev/gvisor/pkg/ring0/pagetables"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/arch/fpu"
"gvisor.dev/gvisor/pkg/sentry/platform"
- "gvisor.dev/gvisor/pkg/usermem"
)
type vCPUArchState struct {
@@ -53,9 +53,9 @@ const (
func (m *machine) mapUpperHalf(pageTable *pagetables.PageTables) {
applyPhysicalRegions(func(pr physicalRegion) bool {
pageTable.Map(
- usermem.Addr(ring0.KernelStartAddress|pr.virtual),
+ hostarch.Addr(ring0.KernelStartAddress|pr.virtual),
pr.length,
- pagetables.MapOpts{AccessType: usermem.AnyAccess, Global: true},
+ pagetables.MapOpts{AccessType: hostarch.AnyAccess, Global: true},
pr.physical)
return true // Keep iterating.
@@ -117,13 +117,13 @@ func availableRegionsForSetMem() (phyRegions []physicalRegion) {
// nonCanonical generates a canonical address return.
//
//go:nosplit
-func nonCanonical(addr uint64, signal int32, info *arch.SignalInfo) (usermem.AccessType, error) {
+func nonCanonical(addr uint64, signal int32, info *arch.SignalInfo) (hostarch.AccessType, error) {
*info = arch.SignalInfo{
Signo: signal,
Code: arch.SignalInfoKernel,
}
info.SetAddr(addr) // Include address.
- return usermem.NoAccess, platform.ErrContextSignal
+ return hostarch.NoAccess, platform.ErrContextSignal
}
// isInstructionAbort returns true if it is an instruction abort.
@@ -148,7 +148,7 @@ func isWriteFault(code uint64) bool {
// fault generates an appropriate fault return.
//
//go:nosplit
-func (c *vCPU) fault(signal int32, info *arch.SignalInfo) (usermem.AccessType, error) {
+func (c *vCPU) fault(signal int32, info *arch.SignalInfo) (hostarch.AccessType, error) {
bluepill(c) // Probably no-op, but may not be.
faultAddr := c.GetFaultAddr()
code, user := c.ErrorCode()
@@ -157,7 +157,7 @@ func (c *vCPU) fault(signal int32, info *arch.SignalInfo) (usermem.AccessType, e
// The last fault serviced by this CPU was not a user
// fault, so we can't reliably trust the faultAddr or
// the code provided here. We need to re-execute.
- return usermem.NoAccess, platform.ErrContextInterrupt
+ return hostarch.NoAccess, platform.ErrContextInterrupt
}
// Reset the pointed SignalInfo.
@@ -174,7 +174,7 @@ func (c *vCPU) fault(signal int32, info *arch.SignalInfo) (usermem.AccessType, e
info.Code = 2
}
- accessType := usermem.AccessType{
+ accessType := hostarch.AccessType{
Read: !isWriteFault(uint64(code)),
Write: isWriteFault(uint64(code)),
Execute: isInstructionAbort(uint64(code)),
diff --git a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go
index e7d5f3193..634e55ec0 100644
--- a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go
+++ b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go
@@ -23,12 +23,12 @@ import (
"unsafe"
"golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/ring0"
"gvisor.dev/gvisor/pkg/ring0/pagetables"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/arch/fpu"
"gvisor.dev/gvisor/pkg/sentry/platform"
- "gvisor.dev/gvisor/pkg/usermem"
)
type kvmVcpuInit struct {
@@ -209,7 +209,7 @@ func (c *vCPU) getOneRegister(reg *kvmOneReg) error {
}
// SwitchToUser unpacks architectural-details.
-func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo) (usermem.AccessType, error) {
+func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo) (hostarch.AccessType, error) {
// Check for canonical addresses.
if regs := switchOpts.Registers; !ring0.IsCanonical(regs.Pc) {
return nonCanonical(regs.Pc, int32(unix.SIGSEGV), info)
@@ -246,13 +246,13 @@ func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo)
switch vector {
case ring0.Syscall:
// Fast path: system call executed.
- return usermem.NoAccess, nil
+ return hostarch.NoAccess, nil
case ring0.PageFault:
return c.fault(int32(unix.SIGSEGV), info)
case ring0.El0ErrNMI:
return c.fault(int32(unix.SIGBUS), info)
case ring0.Vector(bounce): // ring0.VirtualizationException.
- return usermem.NoAccess, platform.ErrContextInterrupt
+ return hostarch.NoAccess, platform.ErrContextInterrupt
case ring0.El0SyncUndef:
return c.fault(int32(unix.SIGILL), info)
case ring0.El0SyncDbg:
@@ -261,16 +261,16 @@ func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo)
Code: 1, // TRAP_BRKPT (breakpoint).
}
info.SetAddr(switchOpts.Registers.Pc) // Include address.
- return usermem.AccessType{}, platform.ErrContextSignal
+ return hostarch.AccessType{}, platform.ErrContextSignal
case ring0.El0SyncSpPc:
*info = arch.SignalInfo{
Signo: int32(unix.SIGBUS),
Code: 2, // BUS_ADRERR (physical address does not exist).
}
- return usermem.NoAccess, platform.ErrContextSignal
+ return hostarch.NoAccess, platform.ErrContextSignal
case ring0.El0SyncSys,
ring0.El0SyncWfx:
- return usermem.NoAccess, nil // skip for now.
+ return hostarch.NoAccess, nil // skip for now.
default:
panic(fmt.Sprintf("unexpected vector: 0x%x", vector))
}
diff --git a/pkg/sentry/platform/kvm/physical_map.go b/pkg/sentry/platform/kvm/physical_map.go
index 7376d8b8d..d812e6c26 100644
--- a/pkg/sentry/platform/kvm/physical_map.go
+++ b/pkg/sentry/platform/kvm/physical_map.go
@@ -19,9 +19,9 @@ import (
"sort"
"golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/ring0"
- "gvisor.dev/gvisor/pkg/usermem"
)
type region struct {
@@ -81,7 +81,7 @@ func fillAddressSpace() (excludedRegions []region) {
// faultBlockSize, potentially causing up to faultBlockSize bytes in
// internal fragmentation for each physical region. So we need to
// account for this properly during allocation.
- requiredAddr, ok := usermem.Addr(vSize - pSize + faultBlockSize).RoundUp()
+ requiredAddr, ok := hostarch.Addr(vSize - pSize + faultBlockSize).RoundUp()
if !ok {
panic(fmt.Sprintf(
"overflow for vSize (%x) - pSize (%x) + faultBlockSize (%x)",
@@ -99,7 +99,7 @@ func fillAddressSpace() (excludedRegions []region) {
0, 0)
if errno != 0 {
// Attempt half the size; overflow not possible.
- currentAddr, _ := usermem.Addr(current >> 1).RoundUp()
+ currentAddr, _ := hostarch.Addr(current >> 1).RoundUp()
current = uintptr(currentAddr)
continue
}
@@ -134,8 +134,8 @@ func computePhysicalRegions(excludedRegions []region) (physicalRegions []physica
return
}
if virtual == 0 {
- virtual += usermem.PageSize
- length -= usermem.PageSize
+ virtual += hostarch.PageSize
+ length -= hostarch.PageSize
}
if end := virtual + length; end > ring0.MaximumUserAddress {
length -= (end - ring0.MaximumUserAddress)
diff --git a/pkg/sentry/platform/kvm/virtual_map.go b/pkg/sentry/platform/kvm/virtual_map.go
index 4dcdbf8a7..01d9eb39d 100644
--- a/pkg/sentry/platform/kvm/virtual_map.go
+++ b/pkg/sentry/platform/kvm/virtual_map.go
@@ -22,12 +22,12 @@ import (
"regexp"
"strconv"
- "gvisor.dev/gvisor/pkg/usermem"
+ "gvisor.dev/gvisor/pkg/hostarch"
)
type virtualRegion struct {
region
- accessType usermem.AccessType
+ accessType hostarch.AccessType
shared bool
offset uintptr
filename string
@@ -92,7 +92,7 @@ func applyVirtualRegions(fn func(vr virtualRegion)) error {
virtual: uintptr(start),
length: uintptr(end - start),
},
- accessType: usermem.AccessType{
+ accessType: hostarch.AccessType{
Read: read,
Write: write,
Execute: execute,
diff --git a/pkg/sentry/platform/kvm/virtual_map_test.go b/pkg/sentry/platform/kvm/virtual_map_test.go
index 9b4545fdd..1f4a774f3 100644
--- a/pkg/sentry/platform/kvm/virtual_map_test.go
+++ b/pkg/sentry/platform/kvm/virtual_map_test.go
@@ -18,12 +18,12 @@ import (
"testing"
"golang.org/x/sys/unix"
- "gvisor.dev/gvisor/pkg/usermem"
+ "gvisor.dev/gvisor/pkg/hostarch"
)
type checker struct {
ok bool
- accessType usermem.AccessType
+ accessType hostarch.AccessType
}
func (c *checker) Containing(addr uintptr) func(virtualRegion) {
@@ -46,7 +46,7 @@ func TestParseMaps(t *testing.T) {
// MMap a new page.
addr, _, errno := unix.RawSyscall6(
- unix.SYS_MMAP, 0, usermem.PageSize,
+ unix.SYS_MMAP, 0, hostarch.PageSize,
unix.PROT_READ|unix.PROT_WRITE,
unix.MAP_ANONYMOUS|unix.MAP_PRIVATE, 0, 0)
if errno != 0 {
@@ -55,19 +55,19 @@ func TestParseMaps(t *testing.T) {
// Re-parse maps.
if err := applyVirtualRegions(c.Containing(addr)); err != nil {
- unix.RawSyscall(unix.SYS_MUNMAP, addr, usermem.PageSize, 0)
+ unix.RawSyscall(unix.SYS_MUNMAP, addr, hostarch.PageSize, 0)
t.Fatalf("unexpected error: %v", err)
}
// Assert that it now does contain the region.
if !c.ok {
- unix.RawSyscall(unix.SYS_MUNMAP, addr, usermem.PageSize, 0)
+ unix.RawSyscall(unix.SYS_MUNMAP, addr, hostarch.PageSize, 0)
t.Fatalf("updated map does not contain 0x%08x, expected true", addr)
}
// Map the region as PROT_NONE.
newAddr, _, errno := unix.RawSyscall6(
- unix.SYS_MMAP, addr, usermem.PageSize,
+ unix.SYS_MMAP, addr, hostarch.PageSize,
unix.PROT_NONE,
unix.MAP_ANONYMOUS|unix.MAP_FIXED|unix.MAP_PRIVATE, 0, 0)
if errno != 0 {
@@ -89,5 +89,5 @@ func TestParseMaps(t *testing.T) {
}
// Unmap the region.
- unix.RawSyscall(unix.SYS_MUNMAP, addr, usermem.PageSize, 0)
+ unix.RawSyscall(unix.SYS_MUNMAP, addr, hostarch.PageSize, 0)
}
diff --git a/pkg/sentry/platform/mmap_min_addr.go b/pkg/sentry/platform/mmap_min_addr.go
index 091c2e365..7335bd802 100644
--- a/pkg/sentry/platform/mmap_min_addr.go
+++ b/pkg/sentry/platform/mmap_min_addr.go
@@ -20,7 +20,7 @@ import (
"strconv"
"strings"
- "gvisor.dev/gvisor/pkg/usermem"
+ "gvisor.dev/gvisor/pkg/hostarch"
)
// systemMMapMinAddrSource is the source file.
@@ -30,8 +30,8 @@ const systemMMapMinAddrSource = "/proc/sys/vm/mmap_min_addr"
var systemMMapMinAddr uint64
// SystemMMapMinAddr returns the minimum system address.
-func SystemMMapMinAddr() usermem.Addr {
- return usermem.Addr(systemMMapMinAddr)
+func SystemMMapMinAddr() hostarch.Addr {
+ return hostarch.Addr(systemMMapMinAddr)
}
// MMapMinAddr is a size zero struct that implements MinUserAddress based on
@@ -41,7 +41,7 @@ type MMapMinAddr struct {
}
// MinUserAddress implements platform.MinUserAddresss.
-func (*MMapMinAddr) MinUserAddress() usermem.Addr {
+func (*MMapMinAddr) MinUserAddress() hostarch.Addr {
return SystemMMapMinAddr()
}
diff --git a/pkg/sentry/platform/platform.go b/pkg/sentry/platform/platform.go
index dcfe839a7..ef7814a6f 100644
--- a/pkg/sentry/platform/platform.go
+++ b/pkg/sentry/platform/platform.go
@@ -23,6 +23,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/seccomp"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/hostmm"
@@ -62,16 +63,16 @@ type Platform interface {
// for AddressSpace.MapFile. As a special case, a MapUnit of 0 indicates
// that the cost of AddressSpace.MapFile is effectively independent of the
// number of pages mapped. If MapUnit is non-zero, it must be a power-of-2
- // multiple of usermem.PageSize.
+ // multiple of hostarch.PageSize.
MapUnit() uint64
// MinUserAddress returns the minimum mappable address on this
// platform.
- MinUserAddress() usermem.Addr
+ MinUserAddress() hostarch.Addr
// MaxUserAddress returns the maximum mappable address on this
// platform.
- MaxUserAddress() usermem.Addr
+ MaxUserAddress() hostarch.Addr
// NewAddressSpace returns a new memory context for this platform.
//
@@ -172,7 +173,7 @@ type MemoryManager interface {
//usermem.IO provides access to the contents of a virtual memory space.
usermem.IO
// MMap establishes a memory mapping.
- MMap(ctx context.Context, opts memmap.MMapOpts) (usermem.Addr, error)
+ MMap(ctx context.Context, opts memmap.MMapOpts) (hostarch.Addr, error)
// AddressSpace returns the AddressSpace bound to mm.
AddressSpace() AddressSpace
}
@@ -195,7 +196,7 @@ type Context interface {
//
// - ErrContextSignal: The Context was interrupted by a signal. The
// returned *arch.SignalInfo contains information about the signal. If
- // arch.SignalInfo.Signo == SIGSEGV, the returned usermem.AccessType
+ // arch.SignalInfo.Signo == SIGSEGV, the returned hostarch.AccessType
// contains the access type of the triggering fault. The caller owns
// the returned SignalInfo.
//
@@ -206,7 +207,7 @@ type Context interface {
// concurrent call to Switch().
//
// - ErrContextCPUPreempted: See the definition of that error for details.
- Switch(ctx context.Context, mm MemoryManager, ac arch.Context, cpu int32) (*arch.SignalInfo, usermem.AccessType, error)
+ Switch(ctx context.Context, mm MemoryManager, ac arch.Context, cpu int32) (*arch.SignalInfo, hostarch.AccessType, error)
// PullFullState() pulls a full state of the application thread.
//
@@ -302,14 +303,14 @@ type AddressSpace interface {
// * at.Any() == true.
// * At least one reference must be held on all pages in fr, and must
// continue to be held as long as pages are mapped.
- MapFile(addr usermem.Addr, f memmap.File, fr memmap.FileRange, at usermem.AccessType, precommit bool) error
+ MapFile(addr hostarch.Addr, f memmap.File, fr memmap.FileRange, at hostarch.AccessType, precommit bool) error
// Unmap unmaps the given range.
//
// Preconditions:
// * addr is page-aligned.
// * length > 0.
- Unmap(addr usermem.Addr, length uint64)
+ Unmap(addr hostarch.Addr, length uint64)
// Release releases this address space. After releasing, a new AddressSpace
// must be acquired via platform.NewAddressSpace().
@@ -337,67 +338,67 @@ type AddressSpaceIO interface {
// CopyOut copies len(src) bytes from src to the memory mapped at addr. It
// returns the number of bytes copied. If the number of bytes copied is <
// len(src), it returns a non-nil error explaining why.
- CopyOut(addr usermem.Addr, src []byte) (int, error)
+ CopyOut(addr hostarch.Addr, src []byte) (int, error)
// CopyIn copies len(dst) bytes from the memory mapped at addr to dst.
// It returns the number of bytes copied. If the number of bytes copied is
// < len(dst), it returns a non-nil error explaining why.
- CopyIn(addr usermem.Addr, dst []byte) (int, error)
+ CopyIn(addr hostarch.Addr, dst []byte) (int, error)
// ZeroOut sets toZero bytes to 0, starting at addr. It returns the number
// of bytes zeroed. If the number of bytes zeroed is < toZero, it returns a
// non-nil error explaining why.
- ZeroOut(addr usermem.Addr, toZero uintptr) (uintptr, error)
+ ZeroOut(addr hostarch.Addr, toZero uintptr) (uintptr, error)
// SwapUint32 atomically sets the uint32 value at addr to new and returns
// the previous value.
//
// Preconditions: addr must be aligned to a 4-byte boundary.
- SwapUint32(addr usermem.Addr, new uint32) (uint32, error)
+ SwapUint32(addr hostarch.Addr, new uint32) (uint32, error)
// CompareAndSwapUint32 atomically compares the uint32 value at addr to
// old; if they are equal, the value in memory is replaced by new. In
// either case, the previous value stored in memory is returned.
//
// Preconditions: addr must be aligned to a 4-byte boundary.
- CompareAndSwapUint32(addr usermem.Addr, old, new uint32) (uint32, error)
+ CompareAndSwapUint32(addr hostarch.Addr, old, new uint32) (uint32, error)
// LoadUint32 atomically loads the uint32 value at addr and returns it.
//
// Preconditions: addr must be aligned to a 4-byte boundary.
- LoadUint32(addr usermem.Addr) (uint32, error)
+ LoadUint32(addr hostarch.Addr) (uint32, error)
}
// NoAddressSpaceIO implements AddressSpaceIO methods by panicking.
type NoAddressSpaceIO struct{}
// CopyOut implements AddressSpaceIO.CopyOut.
-func (NoAddressSpaceIO) CopyOut(addr usermem.Addr, src []byte) (int, error) {
+func (NoAddressSpaceIO) CopyOut(addr hostarch.Addr, src []byte) (int, error) {
panic("This platform does not support AddressSpaceIO")
}
// CopyIn implements AddressSpaceIO.CopyIn.
-func (NoAddressSpaceIO) CopyIn(addr usermem.Addr, dst []byte) (int, error) {
+func (NoAddressSpaceIO) CopyIn(addr hostarch.Addr, dst []byte) (int, error) {
panic("This platform does not support AddressSpaceIO")
}
// ZeroOut implements AddressSpaceIO.ZeroOut.
-func (NoAddressSpaceIO) ZeroOut(addr usermem.Addr, toZero uintptr) (uintptr, error) {
+func (NoAddressSpaceIO) ZeroOut(addr hostarch.Addr, toZero uintptr) (uintptr, error) {
panic("This platform does not support AddressSpaceIO")
}
// SwapUint32 implements AddressSpaceIO.SwapUint32.
-func (NoAddressSpaceIO) SwapUint32(addr usermem.Addr, new uint32) (uint32, error) {
+func (NoAddressSpaceIO) SwapUint32(addr hostarch.Addr, new uint32) (uint32, error) {
panic("This platform does not support AddressSpaceIO")
}
// CompareAndSwapUint32 implements AddressSpaceIO.CompareAndSwapUint32.
-func (NoAddressSpaceIO) CompareAndSwapUint32(addr usermem.Addr, old, new uint32) (uint32, error) {
+func (NoAddressSpaceIO) CompareAndSwapUint32(addr hostarch.Addr, old, new uint32) (uint32, error) {
panic("This platform does not support AddressSpaceIO")
}
// LoadUint32 implements AddressSpaceIO.LoadUint32.
-func (NoAddressSpaceIO) LoadUint32(addr usermem.Addr) (uint32, error) {
+func (NoAddressSpaceIO) LoadUint32(addr hostarch.Addr) (uint32, error) {
panic("This platform does not support AddressSpaceIO")
}
@@ -406,7 +407,7 @@ func (NoAddressSpaceIO) LoadUint32(addr usermem.Addr) (uint32, error) {
// permissions.
type SegmentationFault struct {
// Addr is the address at which the fault occurred.
- Addr usermem.Addr
+ Addr hostarch.Addr
}
// Error implements error.Error.
diff --git a/pkg/sentry/platform/ptrace/BUILD b/pkg/sentry/platform/ptrace/BUILD
index 47efde6a2..d101f2f53 100644
--- a/pkg/sentry/platform/ptrace/BUILD
+++ b/pkg/sentry/platform/ptrace/BUILD
@@ -25,6 +25,7 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/hostarch",
"//pkg/log",
"//pkg/procid",
"//pkg/safecopy",
@@ -35,7 +36,6 @@ go_library(
"//pkg/sentry/platform",
"//pkg/sentry/platform/interrupt",
"//pkg/sync",
- "//pkg/usermem",
"@org_golang_x_sys//unix:go_default_library",
],
)
diff --git a/pkg/sentry/platform/ptrace/ptrace.go b/pkg/sentry/platform/ptrace/ptrace.go
index 571bfcc2e..828458ce2 100644
--- a/pkg/sentry/platform/ptrace/ptrace.go
+++ b/pkg/sentry/platform/ptrace/ptrace.go
@@ -49,11 +49,11 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
pkgcontext "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/platform"
"gvisor.dev/gvisor/pkg/sentry/platform/interrupt"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/usermem"
)
var (
@@ -88,28 +88,28 @@ type context struct {
// lastFaultAddr is the last faulting address; this is only meaningful if
// lastFaultSP is non-nil.
- lastFaultAddr usermem.Addr
+ lastFaultAddr hostarch.Addr
// lastFaultIP is the address of the last faulting instruction;
// this is also only meaningful if lastFaultSP is non-nil.
- lastFaultIP usermem.Addr
+ lastFaultIP hostarch.Addr
}
// Switch runs the provided context in the given address space.
-func (c *context) Switch(ctx pkgcontext.Context, mm platform.MemoryManager, ac arch.Context, cpu int32) (*arch.SignalInfo, usermem.AccessType, error) {
+func (c *context) Switch(ctx pkgcontext.Context, mm platform.MemoryManager, ac arch.Context, cpu int32) (*arch.SignalInfo, hostarch.AccessType, error) {
as := mm.AddressSpace()
s := as.(*subprocess)
isSyscall := s.switchToApp(c, ac)
var (
faultSP *subprocess
- faultAddr usermem.Addr
- faultIP usermem.Addr
+ faultAddr hostarch.Addr
+ faultIP hostarch.Addr
)
if !isSyscall && linux.Signal(c.signalInfo.Signo) == linux.SIGSEGV {
faultSP = s
- faultAddr = usermem.Addr(c.signalInfo.Addr())
- faultIP = usermem.Addr(ac.IP())
+ faultAddr = hostarch.Addr(c.signalInfo.Addr())
+ faultIP = hostarch.Addr(ac.IP())
}
// Update the context to reflect the outcome of this context switch.
@@ -140,14 +140,14 @@ func (c *context) Switch(ctx pkgcontext.Context, mm platform.MemoryManager, ac a
}
if isSyscall {
- return nil, usermem.NoAccess, nil
+ return nil, hostarch.NoAccess, nil
}
si := c.signalInfo
if faultSP == nil {
// Non-fault signal.
- return &si, usermem.NoAccess, platform.ErrContextSignal
+ return &si, hostarch.NoAccess, platform.ErrContextSignal
}
// Got a page fault. Ideally, we'd get real fault type here, but ptrace
@@ -157,7 +157,7 @@ func (c *context) Switch(ctx pkgcontext.Context, mm platform.MemoryManager, ac a
// pointer.
//
// It was a write fault if the fault is immediately repeated.
- at := usermem.Read
+ at := hostarch.Read
if faultAddr == faultIP {
at.Execute = true
}
@@ -235,8 +235,8 @@ func (*PTrace) MapUnit() uint64 {
// MaxUserAddress returns the first address that may not be used by user
// applications.
-func (*PTrace) MaxUserAddress() usermem.Addr {
- return usermem.Addr(stubStart)
+func (*PTrace) MaxUserAddress() hostarch.Addr {
+ return hostarch.Addr(stubStart)
}
// NewAddressSpace returns a new subprocess.
diff --git a/pkg/sentry/platform/ptrace/ptrace_unsafe.go b/pkg/sentry/platform/ptrace/ptrace_unsafe.go
index 01e73b019..facb96011 100644
--- a/pkg/sentry/platform/ptrace/ptrace_unsafe.go
+++ b/pkg/sentry/platform/ptrace/ptrace_unsafe.go
@@ -19,9 +19,9 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/arch/fpu"
- "gvisor.dev/gvisor/pkg/usermem"
)
// getRegs gets the general purpose register set.
@@ -122,7 +122,7 @@ func (t *thread) getSignalInfo(si *arch.SignalInfo) error {
//
// Precondition: the OS thread must be locked and own t.
func (t *thread) clone() (*thread, error) {
- r, ok := usermem.Addr(stackPointer(&t.initRegs)).RoundUp()
+ r, ok := hostarch.Addr(stackPointer(&t.initRegs)).RoundUp()
if !ok {
return nil, unix.EINVAL
}
diff --git a/pkg/sentry/platform/ptrace/stub_unsafe.go b/pkg/sentry/platform/ptrace/stub_unsafe.go
index 780227248..5c9b7784f 100644
--- a/pkg/sentry/platform/ptrace/stub_unsafe.go
+++ b/pkg/sentry/platform/ptrace/stub_unsafe.go
@@ -19,8 +19,8 @@ import (
"unsafe"
"golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/safecopy"
- "gvisor.dev/gvisor/pkg/usermem"
)
// stub is defined in arch-specific assembly.
@@ -45,8 +45,8 @@ func stubInit() {
stubLen := int(safecopy.FindEndAddress(stubBegin) - stubBegin)
stubSlice := unsafeSlice(stubBegin, stubLen)
mapLen := uintptr(stubLen)
- if offset := mapLen % usermem.PageSize; offset != 0 {
- mapLen += usermem.PageSize - offset
+ if offset := mapLen % hostarch.PageSize; offset != 0 {
+ mapLen += hostarch.PageSize - offset
}
for stubStart > 0 {
@@ -70,7 +70,7 @@ func stubInit() {
}
// Attempt to begin at a lower address.
- stubStart -= uintptr(usermem.PageSize)
+ stubStart -= uintptr(hostarch.PageSize)
continue
}
diff --git a/pkg/sentry/platform/ptrace/subprocess.go b/pkg/sentry/platform/ptrace/subprocess.go
index acccbfe2e..9c73a725a 100644
--- a/pkg/sentry/platform/ptrace/subprocess.go
+++ b/pkg/sentry/platform/ptrace/subprocess.go
@@ -20,13 +20,13 @@ import (
"runtime"
"golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/procid"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/platform"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/usermem"
)
// Linux kernel errnos which "should never be seen by user programs", but will
@@ -69,7 +69,7 @@ type thread struct {
// threadPool is a collection of threads.
type threadPool struct {
// mu protects below.
- mu sync.Mutex
+ mu sync.RWMutex
// threads is the collection of threads.
//
@@ -85,30 +85,42 @@ type threadPool struct {
//
// Precondition: the runtime OS thread must be locked.
func (tp *threadPool) lookupOrCreate(currentTID int32, newThread func() *thread) *thread {
- tp.mu.Lock()
+ // The overwhelming common case is that the thread is already created.
+ // Optimistically attempt the lookup by only locking for reading.
+ tp.mu.RLock()
t, ok := tp.threads[currentTID]
- if !ok {
- // Before creating a new thread, see if we can find a thread
- // whose system tid has disappeared.
- //
- // TODO(b/77216482): Other parts of this package depend on
- // threads never exiting.
- for origTID, t := range tp.threads {
- // Signal zero is an easy existence check.
- if err := unix.Tgkill(unix.Getpid(), int(origTID), 0); err != nil {
- // This thread has been abandoned; reuse it.
- delete(tp.threads, origTID)
- tp.threads[currentTID] = t
- tp.mu.Unlock()
- return t
- }
- }
+ tp.mu.RUnlock()
+ if ok {
+ return t
+ }
- // Create a new thread.
- t = newThread()
- tp.threads[currentTID] = t
+ tp.mu.Lock()
+ defer tp.mu.Unlock()
+
+ // Another goroutine might have created the thread for currentTID in between
+ // mu.RUnlock() and mu.Lock().
+ if t, ok = tp.threads[currentTID]; ok {
+ return t
+ }
+
+ // Before creating a new thread, see if we can find a thread
+ // whose system tid has disappeared.
+ //
+ // TODO(b/77216482): Other parts of this package depend on
+ // threads never exiting.
+ for origTID, t := range tp.threads {
+ // Signal zero is an easy existence check.
+ if err := unix.Tgkill(unix.Getpid(), int(origTID), 0); err != nil {
+ // This thread has been abandoned; reuse it.
+ delete(tp.threads, origTID)
+ tp.threads[currentTID] = t
+ return t
+ }
}
- tp.mu.Unlock()
+
+ // Create a new thread.
+ t = newThread()
+ tp.threads[currentTID] = t
return t
}
@@ -228,7 +240,7 @@ func newSubprocess(create func() (*thread, error)) (*subprocess, error) {
func (s *subprocess) unmap() {
s.Unmap(0, uint64(stubStart))
if maximumUserAddress != stubEnd {
- s.Unmap(usermem.Addr(stubEnd), uint64(maximumUserAddress-stubEnd))
+ s.Unmap(hostarch.Addr(stubEnd), uint64(maximumUserAddress-stubEnd))
}
}
@@ -615,7 +627,7 @@ func (s *subprocess) syscall(sysno uintptr, args ...arch.SyscallArgument) (uintp
}
// MapFile implements platform.AddressSpace.MapFile.
-func (s *subprocess) MapFile(addr usermem.Addr, f memmap.File, fr memmap.FileRange, at usermem.AccessType, precommit bool) error {
+func (s *subprocess) MapFile(addr hostarch.Addr, f memmap.File, fr memmap.FileRange, at hostarch.AccessType, precommit bool) error {
var flags int
if precommit {
flags |= unix.MAP_POPULATE
@@ -632,7 +644,7 @@ func (s *subprocess) MapFile(addr usermem.Addr, f memmap.File, fr memmap.FileRan
}
// Unmap implements platform.AddressSpace.Unmap.
-func (s *subprocess) Unmap(addr usermem.Addr, length uint64) {
+func (s *subprocess) Unmap(addr hostarch.Addr, length uint64) {
ar, ok := addr.ToRange(length)
if !ok {
panic(fmt.Sprintf("addr %#x + length %#x overflows", addr, length))
diff --git a/pkg/sentry/socket/BUILD b/pkg/sentry/socket/BUILD
index 0ce42b6cc..080859125 100644
--- a/pkg/sentry/socket/BUILD
+++ b/pkg/sentry/socket/BUILD
@@ -10,6 +10,7 @@ go_library(
"//pkg/abi/linux",
"//pkg/binary",
"//pkg/context",
+ "//pkg/hostarch",
"//pkg/marshal",
"//pkg/sentry/device",
"//pkg/sentry/fs",
diff --git a/pkg/sentry/socket/control/BUILD b/pkg/sentry/socket/control/BUILD
index ebcc891b3..0e0e82365 100644
--- a/pkg/sentry/socket/control/BUILD
+++ b/pkg/sentry/socket/control/BUILD
@@ -16,6 +16,7 @@ go_library(
"//pkg/abi/linux",
"//pkg/binary",
"//pkg/context",
+ "//pkg/hostarch",
"//pkg/sentry/fs",
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
@@ -23,7 +24,6 @@ go_library(
"//pkg/sentry/socket/unix/transport",
"//pkg/sentry/vfs",
"//pkg/syserror",
- "//pkg/usermem",
],
)
@@ -35,8 +35,8 @@ go_test(
deps = [
"//pkg/abi/linux",
"//pkg/binary",
+ "//pkg/hostarch",
"//pkg/sentry/socket",
- "//pkg/usermem",
"@com_github_google_go_cmp//cmp:go_default_library",
],
)
diff --git a/pkg/sentry/socket/control/control.go b/pkg/sentry/socket/control/control.go
index 65b556489..45a05cd63 100644
--- a/pkg/sentry/socket/control/control.go
+++ b/pkg/sentry/socket/control/control.go
@@ -20,13 +20,13 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/binary"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/socket"
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
const maxInt = int(^uint(0) >> 1)
@@ -181,12 +181,12 @@ func (c *scmCredentials) Equals(oc transport.CredentialsControlMessage) bool {
}
func putUint64(buf []byte, n uint64) []byte {
- usermem.ByteOrder.PutUint64(buf[len(buf):len(buf)+8], n)
+ hostarch.ByteOrder.PutUint64(buf[len(buf):len(buf)+8], n)
return buf[:len(buf)+8]
}
func putUint32(buf []byte, n uint32) []byte {
- usermem.ByteOrder.PutUint32(buf[len(buf):len(buf)+4], n)
+ hostarch.ByteOrder.PutUint32(buf[len(buf):len(buf)+4], n)
return buf[:len(buf)+4]
}
@@ -242,7 +242,7 @@ func putCmsgStruct(buf []byte, msgLevel, msgType uint32, align uint, data interf
hdrBuf := buf
- buf = binary.Marshal(buf, usermem.ByteOrder, data)
+ buf = binary.Marshal(buf, hostarch.ByteOrder, data)
// If the control message data brought us over capacity, omit it.
if cap(buf) != cap(ob) {
@@ -475,7 +475,7 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte, width uint)
}
var h linux.ControlMessageHeader
- binary.Unmarshal(buf[i:i+linux.SizeOfControlMessageHeader], usermem.ByteOrder, &h)
+ binary.Unmarshal(buf[i:i+linux.SizeOfControlMessageHeader], hostarch.ByteOrder, &h)
if h.Length < uint64(linux.SizeOfControlMessageHeader) {
return socket.ControlMessages{}, syserror.EINVAL
@@ -499,7 +499,7 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte, width uint)
}
for j := i; j < i+rightsSize; j += linux.SizeOfControlMessageRight {
- fds = append(fds, int32(usermem.ByteOrder.Uint32(buf[j:j+linux.SizeOfControlMessageRight])))
+ fds = append(fds, int32(hostarch.ByteOrder.Uint32(buf[j:j+linux.SizeOfControlMessageRight])))
}
i += binary.AlignUp(length, width)
@@ -510,7 +510,7 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte, width uint)
}
var creds linux.ControlMessageCredentials
- binary.Unmarshal(buf[i:i+linux.SizeOfControlMessageCredentials], usermem.ByteOrder, &creds)
+ binary.Unmarshal(buf[i:i+linux.SizeOfControlMessageCredentials], hostarch.ByteOrder, &creds)
scmCreds, err := NewSCMCredentials(t, creds)
if err != nil {
return socket.ControlMessages{}, err
@@ -523,7 +523,7 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte, width uint)
return socket.ControlMessages{}, syserror.EINVAL
}
var ts linux.Timeval
- binary.Unmarshal(buf[i:i+linux.SizeOfTimeval], usermem.ByteOrder, &ts)
+ binary.Unmarshal(buf[i:i+linux.SizeOfTimeval], hostarch.ByteOrder, &ts)
cmsgs.IP.Timestamp = ts.ToNsecCapped()
cmsgs.IP.HasTimestamp = true
i += binary.AlignUp(length, width)
@@ -539,7 +539,7 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte, width uint)
return socket.ControlMessages{}, syserror.EINVAL
}
cmsgs.IP.HasTOS = true
- binary.Unmarshal(buf[i:i+linux.SizeOfControlMessageTOS], usermem.ByteOrder, &cmsgs.IP.TOS)
+ binary.Unmarshal(buf[i:i+linux.SizeOfControlMessageTOS], hostarch.ByteOrder, &cmsgs.IP.TOS)
i += binary.AlignUp(length, width)
case linux.IP_PKTINFO:
@@ -549,7 +549,7 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte, width uint)
cmsgs.IP.HasIPPacketInfo = true
var packetInfo linux.ControlMessageIPPacketInfo
- binary.Unmarshal(buf[i:i+linux.SizeOfControlMessageIPPacketInfo], usermem.ByteOrder, &packetInfo)
+ binary.Unmarshal(buf[i:i+linux.SizeOfControlMessageIPPacketInfo], hostarch.ByteOrder, &packetInfo)
cmsgs.IP.PacketInfo = packetInfo
i += binary.AlignUp(length, width)
@@ -559,7 +559,7 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte, width uint)
if length < addr.SizeBytes() {
return socket.ControlMessages{}, syserror.EINVAL
}
- binary.Unmarshal(buf[i:i+addr.SizeBytes()], usermem.ByteOrder, &addr)
+ binary.Unmarshal(buf[i:i+addr.SizeBytes()], hostarch.ByteOrder, &addr)
cmsgs.IP.OriginalDstAddress = &addr
i += binary.AlignUp(length, width)
@@ -583,7 +583,7 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte, width uint)
return socket.ControlMessages{}, syserror.EINVAL
}
cmsgs.IP.HasTClass = true
- binary.Unmarshal(buf[i:i+linux.SizeOfControlMessageTClass], usermem.ByteOrder, &cmsgs.IP.TClass)
+ binary.Unmarshal(buf[i:i+linux.SizeOfControlMessageTClass], hostarch.ByteOrder, &cmsgs.IP.TClass)
i += binary.AlignUp(length, width)
case linux.IPV6_RECVORIGDSTADDR:
@@ -591,7 +591,7 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte, width uint)
if length < addr.SizeBytes() {
return socket.ControlMessages{}, syserror.EINVAL
}
- binary.Unmarshal(buf[i:i+addr.SizeBytes()], usermem.ByteOrder, &addr)
+ binary.Unmarshal(buf[i:i+addr.SizeBytes()], hostarch.ByteOrder, &addr)
cmsgs.IP.OriginalDstAddress = &addr
i += binary.AlignUp(length, width)
diff --git a/pkg/sentry/socket/control/control_test.go b/pkg/sentry/socket/control/control_test.go
index d40a4cc85..7e28a0cef 100644
--- a/pkg/sentry/socket/control/control_test.go
+++ b/pkg/sentry/socket/control/control_test.go
@@ -22,8 +22,8 @@ import (
"github.com/google/go-cmp/cmp"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/socket"
- "gvisor.dev/gvisor/pkg/usermem"
)
func TestParse(t *testing.T) {
@@ -35,12 +35,12 @@ func TestParse(t *testing.T) {
Type: linux.SO_TIMESTAMP,
}
buf := make([]byte, 0, length)
- buf = binary.Marshal(buf, usermem.ByteOrder, &hdr)
+ buf = binary.Marshal(buf, hostarch.ByteOrder, &hdr)
ts := linux.Timeval{
Sec: 2401,
Usec: 343,
}
- buf = binary.Marshal(buf, usermem.ByteOrder, &ts)
+ buf = binary.Marshal(buf, hostarch.ByteOrder, &ts)
cmsg, err := Parse(nil, nil, buf, 8 /* width */)
if err != nil {
diff --git a/pkg/sentry/socket/hostinet/BUILD b/pkg/sentry/socket/hostinet/BUILD
index a8e6f172b..a5c2155a2 100644
--- a/pkg/sentry/socket/hostinet/BUILD
+++ b/pkg/sentry/socket/hostinet/BUILD
@@ -20,6 +20,7 @@ go_library(
"//pkg/binary",
"//pkg/context",
"//pkg/fdnotifier",
+ "//pkg/hostarch",
"//pkg/log",
"//pkg/marshal",
"//pkg/marshal/primitive",
diff --git a/pkg/sentry/socket/hostinet/socket.go b/pkg/sentry/socket/hostinet/socket.go
index 2d9dbbdba..a784e23b5 100644
--- a/pkg/sentry/socket/hostinet/socket.go
+++ b/pkg/sentry/socket/hostinet/socket.go
@@ -22,6 +22,7 @@ import (
"gvisor.dev/gvisor/pkg/binary"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fdnotifier"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/marshal"
"gvisor.dev/gvisor/pkg/marshal/primitive"
@@ -321,7 +322,7 @@ func (s *socketOpsCommon) Shutdown(t *kernel.Task, how int) *syserr.Error {
}
// GetSockOpt implements socket.Socket.GetSockOpt.
-func (s *socketOpsCommon) GetSockOpt(t *kernel.Task, level int, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error) {
+func (s *socketOpsCommon) GetSockOpt(t *kernel.Task, level int, name int, outPtr hostarch.Addr, outLen int) (marshal.Marshallable, *syserr.Error) {
if outLen < 0 {
return nil, syserr.ErrInvalidArgument
}
@@ -527,24 +528,24 @@ func parseUnixControlMessages(unixControlMessages []unix.SocketControlMessage) s
switch unixCmsg.Header.Type {
case linux.SO_TIMESTAMP:
controlMessages.IP.HasTimestamp = true
- binary.Unmarshal(unixCmsg.Data[:linux.SizeOfTimeval], usermem.ByteOrder, &controlMessages.IP.Timestamp)
+ binary.Unmarshal(unixCmsg.Data[:linux.SizeOfTimeval], hostarch.ByteOrder, &controlMessages.IP.Timestamp)
}
case linux.SOL_IP:
switch unixCmsg.Header.Type {
case linux.IP_TOS:
controlMessages.IP.HasTOS = true
- binary.Unmarshal(unixCmsg.Data[:linux.SizeOfControlMessageTOS], usermem.ByteOrder, &controlMessages.IP.TOS)
+ binary.Unmarshal(unixCmsg.Data[:linux.SizeOfControlMessageTOS], hostarch.ByteOrder, &controlMessages.IP.TOS)
case linux.IP_PKTINFO:
controlMessages.IP.HasIPPacketInfo = true
var packetInfo linux.ControlMessageIPPacketInfo
- binary.Unmarshal(unixCmsg.Data[:linux.SizeOfControlMessageIPPacketInfo], usermem.ByteOrder, &packetInfo)
+ binary.Unmarshal(unixCmsg.Data[:linux.SizeOfControlMessageIPPacketInfo], hostarch.ByteOrder, &packetInfo)
controlMessages.IP.PacketInfo = packetInfo
case linux.IP_RECVORIGDSTADDR:
var addr linux.SockAddrInet
- binary.Unmarshal(unixCmsg.Data[:addr.SizeBytes()], usermem.ByteOrder, &addr)
+ binary.Unmarshal(unixCmsg.Data[:addr.SizeBytes()], hostarch.ByteOrder, &addr)
controlMessages.IP.OriginalDstAddress = &addr
case unix.IP_RECVERR:
@@ -557,11 +558,11 @@ func parseUnixControlMessages(unixControlMessages []unix.SocketControlMessage) s
switch unixCmsg.Header.Type {
case linux.IPV6_TCLASS:
controlMessages.IP.HasTClass = true
- binary.Unmarshal(unixCmsg.Data[:linux.SizeOfControlMessageTClass], usermem.ByteOrder, &controlMessages.IP.TClass)
+ binary.Unmarshal(unixCmsg.Data[:linux.SizeOfControlMessageTClass], hostarch.ByteOrder, &controlMessages.IP.TClass)
case linux.IPV6_RECVORIGDSTADDR:
var addr linux.SockAddrInet6
- binary.Unmarshal(unixCmsg.Data[:addr.SizeBytes()], usermem.ByteOrder, &addr)
+ binary.Unmarshal(unixCmsg.Data[:addr.SizeBytes()], hostarch.ByteOrder, &addr)
controlMessages.IP.OriginalDstAddress = &addr
case unix.IPV6_RECVERR:
@@ -574,7 +575,7 @@ func parseUnixControlMessages(unixControlMessages []unix.SocketControlMessage) s
switch unixCmsg.Header.Type {
case linux.TCP_INQ:
controlMessages.IP.HasInq = true
- binary.Unmarshal(unixCmsg.Data[:linux.SizeOfControlMessageInq], usermem.ByteOrder, &controlMessages.IP.Inq)
+ binary.Unmarshal(unixCmsg.Data[:linux.SizeOfControlMessageInq], hostarch.ByteOrder, &controlMessages.IP.Inq)
}
}
}
@@ -688,7 +689,7 @@ func (s *socketOpsCommon) State() uint32 {
return 0
}
- binary.Unmarshal(buf, usermem.ByteOrder, &info)
+ binary.Unmarshal(buf, hostarch.ByteOrder, &info)
return uint32(info.State)
}
diff --git a/pkg/sentry/socket/hostinet/socket_unsafe.go b/pkg/sentry/socket/hostinet/socket_unsafe.go
index 2890e640d..d3be2d825 100644
--- a/pkg/sentry/socket/hostinet/socket_unsafe.go
+++ b/pkg/sentry/socket/hostinet/socket_unsafe.go
@@ -20,6 +20,7 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/socket"
@@ -61,7 +62,7 @@ func ioctl(ctx context.Context, fd int, io usermem.IO, args arch.SyscallArgument
return 0, translateIOSyscallError(errno)
}
var buf [4]byte
- usermem.ByteOrder.PutUint32(buf[:], uint32(val))
+ hostarch.ByteOrder.PutUint32(buf[:], uint32(val))
_, err := io.CopyOut(ctx, args[2].Pointer(), buf[:], usermem.IOOpts{
AddressSpaceActive: true,
})
diff --git a/pkg/sentry/socket/hostinet/stack.go b/pkg/sentry/socket/hostinet/stack.go
index 5bcf92e14..26e8ae17a 100644
--- a/pkg/sentry/socket/hostinet/stack.go
+++ b/pkg/sentry/socket/hostinet/stack.go
@@ -22,11 +22,13 @@ import (
"reflect"
"strconv"
"strings"
+
"syscall"
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/binary"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/inet"
"gvisor.dev/gvisor/pkg/syserr"
@@ -146,7 +148,7 @@ func ExtractHostInterfaces(links []syscall.NetlinkMessage, addrs []syscall.Netli
return fmt.Errorf("RTM_GETLINK returned RTM_NEWLINK message with invalid data length (%d bytes, expected at least %d bytes)", len(link.Data), unix.SizeofIfInfomsg)
}
var ifinfo unix.IfInfomsg
- binary.Unmarshal(link.Data[:unix.SizeofIfInfomsg], usermem.ByteOrder, &ifinfo)
+ binary.Unmarshal(link.Data[:unix.SizeofIfInfomsg], hostarch.ByteOrder, &ifinfo)
inetIF := inet.Interface{
DeviceType: ifinfo.Type,
Flags: ifinfo.Flags,
@@ -177,7 +179,7 @@ func ExtractHostInterfaces(links []syscall.NetlinkMessage, addrs []syscall.Netli
return fmt.Errorf("RTM_GETADDR returned RTM_NEWADDR message with invalid data length (%d bytes, expected at least %d bytes)", len(addr.Data), unix.SizeofIfAddrmsg)
}
var ifaddr unix.IfAddrmsg
- binary.Unmarshal(addr.Data[:unix.SizeofIfAddrmsg], usermem.ByteOrder, &ifaddr)
+ binary.Unmarshal(addr.Data[:unix.SizeofIfAddrmsg], hostarch.ByteOrder, &ifaddr)
inetAddr := inet.InterfaceAddr{
Family: ifaddr.Family,
PrefixLen: ifaddr.Prefixlen,
@@ -209,7 +211,7 @@ func ExtractHostRoutes(routeMsgs []syscall.NetlinkMessage) ([]inet.Route, error)
}
var ifRoute unix.RtMsg
- binary.Unmarshal(routeMsg.Data[:unix.SizeofRtMsg], usermem.ByteOrder, &ifRoute)
+ binary.Unmarshal(routeMsg.Data[:unix.SizeofRtMsg], hostarch.ByteOrder, &ifRoute)
inetRoute := inet.Route{
Family: ifRoute.Family,
DstLen: ifRoute.Dst_len,
@@ -243,7 +245,7 @@ func ExtractHostRoutes(routeMsgs []syscall.NetlinkMessage) ([]inet.Route, error)
if len(attr.Value) != expected {
return nil, fmt.Errorf("RTM_GETROUTE returned RTM_NEWROUTE message with invalid attribute data length (%d bytes, expected %d bytes)", len(attr.Value), expected)
}
- binary.Unmarshal(attr.Value, usermem.ByteOrder, &inetRoute.OutputInterface)
+ binary.Unmarshal(attr.Value, hostarch.ByteOrder, &inetRoute.OutputInterface)
}
}
diff --git a/pkg/sentry/socket/netfilter/BUILD b/pkg/sentry/socket/netfilter/BUILD
index 8aea0200f..4381dfa06 100644
--- a/pkg/sentry/socket/netfilter/BUILD
+++ b/pkg/sentry/socket/netfilter/BUILD
@@ -20,12 +20,12 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/binary",
+ "//pkg/hostarch",
"//pkg/log",
"//pkg/sentry/kernel",
"//pkg/syserr",
"//pkg/tcpip",
"//pkg/tcpip/header",
"//pkg/tcpip/stack",
- "//pkg/usermem",
],
)
diff --git a/pkg/sentry/socket/netfilter/extensions.go b/pkg/sentry/socket/netfilter/extensions.go
index e339f9bea..4bd305a44 100644
--- a/pkg/sentry/socket/netfilter/extensions.go
+++ b/pkg/sentry/socket/netfilter/extensions.go
@@ -19,10 +19,10 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/syserr"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/stack"
- "gvisor.dev/gvisor/pkg/usermem"
)
// TODO(gvisor.dev/issue/170): The following per-matcher params should be
@@ -89,7 +89,7 @@ func marshalEntryMatch(name string, data []byte) []byte {
copy(matcher.Name[:], name)
buf := make([]byte, 0, size)
- buf = binary.Marshal(buf, usermem.ByteOrder, matcher)
+ buf = binary.Marshal(buf, hostarch.ByteOrder, matcher)
return append(buf, make([]byte, size-len(buf))...)
}
diff --git a/pkg/sentry/socket/netfilter/ipv4.go b/pkg/sentry/socket/netfilter/ipv4.go
index 2f913787b..1fc4cb651 100644
--- a/pkg/sentry/socket/netfilter/ipv4.go
+++ b/pkg/sentry/socket/netfilter/ipv4.go
@@ -19,11 +19,11 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/syserr"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/stack"
- "gvisor.dev/gvisor/pkg/usermem"
)
// emptyIPv4Filter is for comparison with a rule's filters to determine whether
@@ -142,7 +142,7 @@ func modifyEntries4(stk *stack.Stack, optVal []byte, replace *linux.IPTReplace,
}
var entry linux.IPTEntry
buf := optVal[:linux.SizeOfIPTEntry]
- binary.Unmarshal(buf, usermem.ByteOrder, &entry)
+ binary.Unmarshal(buf, hostarch.ByteOrder, &entry)
initialOptValLen := len(optVal)
optVal = optVal[linux.SizeOfIPTEntry:]
diff --git a/pkg/sentry/socket/netfilter/ipv6.go b/pkg/sentry/socket/netfilter/ipv6.go
index 263d9d3b5..67a52b628 100644
--- a/pkg/sentry/socket/netfilter/ipv6.go
+++ b/pkg/sentry/socket/netfilter/ipv6.go
@@ -19,11 +19,11 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/syserr"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/stack"
- "gvisor.dev/gvisor/pkg/usermem"
)
// emptyIPv6Filter is for comparison with a rule's filters to determine whether
@@ -145,7 +145,7 @@ func modifyEntries6(stk *stack.Stack, optVal []byte, replace *linux.IPTReplace,
}
var entry linux.IP6TEntry
buf := optVal[:linux.SizeOfIP6TEntry]
- binary.Unmarshal(buf, usermem.ByteOrder, &entry)
+ binary.Unmarshal(buf, hostarch.ByteOrder, &entry)
initialOptValLen := len(optVal)
optVal = optVal[linux.SizeOfIP6TEntry:]
diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go
index 7ae18b2a3..5200e08ed 100644
--- a/pkg/sentry/socket/netfilter/netfilter.go
+++ b/pkg/sentry/socket/netfilter/netfilter.go
@@ -23,12 +23,12 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/syserr"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/stack"
- "gvisor.dev/gvisor/pkg/usermem"
)
// enableLogging controls whether to log the (de)serialization of netfilter
@@ -83,7 +83,7 @@ func DefaultLinuxTables() *stack.IPTables {
}
// GetInfo returns information about iptables.
-func GetInfo(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, ipv6 bool) (linux.IPTGetinfo, *syserr.Error) {
+func GetInfo(t *kernel.Task, stack *stack.Stack, outPtr hostarch.Addr, ipv6 bool) (linux.IPTGetinfo, *syserr.Error) {
// Read in the struct and table name.
var info linux.IPTGetinfo
if _, err := info.CopyIn(t, outPtr); err != nil {
@@ -106,7 +106,7 @@ func GetInfo(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, ipv6 bool)
}
// GetEntries4 returns netstack's iptables rules.
-func GetEntries4(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, outLen int) (linux.KernelIPTGetEntries, *syserr.Error) {
+func GetEntries4(t *kernel.Task, stack *stack.Stack, outPtr hostarch.Addr, outLen int) (linux.KernelIPTGetEntries, *syserr.Error) {
// Read in the struct and table name.
var userEntries linux.IPTGetEntries
if _, err := userEntries.CopyIn(t, outPtr); err != nil {
@@ -130,7 +130,7 @@ func GetEntries4(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, outLen
}
// GetEntries6 returns netstack's ip6tables rules.
-func GetEntries6(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, outLen int) (linux.KernelIP6TGetEntries, *syserr.Error) {
+func GetEntries6(t *kernel.Task, stack *stack.Stack, outPtr hostarch.Addr, outLen int) (linux.KernelIP6TGetEntries, *syserr.Error) {
// Read in the struct and table name. IPv4 and IPv6 utilize structs
// with the same layout.
var userEntries linux.IPTGetEntries
@@ -179,7 +179,7 @@ func SetEntries(stk *stack.Stack, optVal []byte, ipv6 bool) *syserr.Error {
var replace linux.IPTReplace
replaceBuf := optVal[:linux.SizeOfIPTReplace]
optVal = optVal[linux.SizeOfIPTReplace:]
- binary.Unmarshal(replaceBuf, usermem.ByteOrder, &replace)
+ binary.Unmarshal(replaceBuf, hostarch.ByteOrder, &replace)
// TODO(gvisor.dev/issue/170): Support other tables.
var table stack.Table
@@ -310,7 +310,7 @@ func parseMatchers(filter stack.IPHeaderFilter, optVal []byte) ([]stack.Matcher,
}
var match linux.XTEntryMatch
buf := optVal[:linux.SizeOfXTEntryMatch]
- binary.Unmarshal(buf, usermem.ByteOrder, &match)
+ binary.Unmarshal(buf, hostarch.ByteOrder, &match)
nflog("set entries: parsed entry match %q: %+v", match.Name.String(), match)
// Check some invariants.
@@ -381,7 +381,7 @@ func hookFromLinux(hook int) stack.Hook {
// TargetRevision returns a linux.XTGetRevision for a given target. It sets
// Revision to the highest supported value, unless the provided revision number
// is larger.
-func TargetRevision(t *kernel.Task, revPtr usermem.Addr, netProto tcpip.NetworkProtocolNumber) (linux.XTGetRevision, *syserr.Error) {
+func TargetRevision(t *kernel.Task, revPtr hostarch.Addr, netProto tcpip.NetworkProtocolNumber) (linux.XTGetRevision, *syserr.Error) {
// Read in the target name and version.
var rev linux.XTGetRevision
if _, err := rev.CopyIn(t, revPtr); err != nil {
diff --git a/pkg/sentry/socket/netfilter/owner_matcher.go b/pkg/sentry/socket/netfilter/owner_matcher.go
index 5f80d82ea..b2cc6be20 100644
--- a/pkg/sentry/socket/netfilter/owner_matcher.go
+++ b/pkg/sentry/socket/netfilter/owner_matcher.go
@@ -19,8 +19,8 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/tcpip/stack"
- "gvisor.dev/gvisor/pkg/usermem"
)
const matcherNameOwner = "owner"
@@ -60,7 +60,7 @@ func (ownerMarshaler) marshal(mr matcher) []byte {
}
buf := make([]byte, 0, linux.SizeOfIPTOwnerInfo)
- return marshalEntryMatch(matcherNameOwner, binary.Marshal(buf, usermem.ByteOrder, iptOwnerInfo))
+ return marshalEntryMatch(matcherNameOwner, binary.Marshal(buf, hostarch.ByteOrder, iptOwnerInfo))
}
// unmarshal implements matchMaker.unmarshal.
@@ -72,7 +72,7 @@ func (ownerMarshaler) unmarshal(buf []byte, filter stack.IPHeaderFilter) (stack.
// For alignment reasons, the match's total size may
// exceed what's strictly necessary to hold matchData.
var matchData linux.IPTOwnerInfo
- binary.Unmarshal(buf[:linux.SizeOfIPTOwnerInfo], usermem.ByteOrder, &matchData)
+ binary.Unmarshal(buf[:linux.SizeOfIPTOwnerInfo], hostarch.ByteOrder, &matchData)
nflog("parseMatchers: parsed IPTOwnerInfo: %+v", matchData)
var owner OwnerMatcher
diff --git a/pkg/sentry/socket/netfilter/targets.go b/pkg/sentry/socket/netfilter/targets.go
index f2653d523..80f8c6430 100644
--- a/pkg/sentry/socket/netfilter/targets.go
+++ b/pkg/sentry/socket/netfilter/targets.go
@@ -19,11 +19,11 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/syserr"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/stack"
- "gvisor.dev/gvisor/pkg/usermem"
)
// ErrorTargetName is used to mark targets as error targets. Error targets
@@ -167,7 +167,7 @@ func (*standardTargetMaker) marshal(target target) []byte {
}
ret := make([]byte, 0, linux.SizeOfXTStandardTarget)
- return binary.Marshal(ret, usermem.ByteOrder, xt)
+ return binary.Marshal(ret, hostarch.ByteOrder, xt)
}
func (*standardTargetMaker) unmarshal(buf []byte, filter stack.IPHeaderFilter) (target, *syserr.Error) {
@@ -177,7 +177,7 @@ func (*standardTargetMaker) unmarshal(buf []byte, filter stack.IPHeaderFilter) (
}
var standardTarget linux.XTStandardTarget
buf = buf[:linux.SizeOfXTStandardTarget]
- binary.Unmarshal(buf, usermem.ByteOrder, &standardTarget)
+ binary.Unmarshal(buf, hostarch.ByteOrder, &standardTarget)
if standardTarget.Verdict < 0 {
// A Verdict < 0 indicates a non-jump verdict.
@@ -223,7 +223,7 @@ func (*errorTargetMaker) marshal(target target) []byte {
copy(xt.Target.Name[:], ErrorTargetName)
ret := make([]byte, 0, linux.SizeOfXTErrorTarget)
- return binary.Marshal(ret, usermem.ByteOrder, xt)
+ return binary.Marshal(ret, hostarch.ByteOrder, xt)
}
func (*errorTargetMaker) unmarshal(buf []byte, filter stack.IPHeaderFilter) (target, *syserr.Error) {
@@ -233,7 +233,7 @@ func (*errorTargetMaker) unmarshal(buf []byte, filter stack.IPHeaderFilter) (tar
}
var errTgt linux.XTErrorTarget
buf = buf[:linux.SizeOfXTErrorTarget]
- binary.Unmarshal(buf, usermem.ByteOrder, &errTgt)
+ binary.Unmarshal(buf, hostarch.ByteOrder, &errTgt)
// Error targets are used in 2 cases:
// * An actual error case. These rules have an error named
@@ -281,7 +281,7 @@ func (*redirectTargetMaker) marshal(target target) []byte {
xt.NfRange.RangeIPV4.Flags |= linux.NF_NAT_RANGE_PROTO_SPECIFIED
xt.NfRange.RangeIPV4.MinPort = htons(rt.Port)
xt.NfRange.RangeIPV4.MaxPort = xt.NfRange.RangeIPV4.MinPort
- return binary.Marshal(ret, usermem.ByteOrder, xt)
+ return binary.Marshal(ret, hostarch.ByteOrder, xt)
}
func (*redirectTargetMaker) unmarshal(buf []byte, filter stack.IPHeaderFilter) (target, *syserr.Error) {
@@ -297,7 +297,7 @@ func (*redirectTargetMaker) unmarshal(buf []byte, filter stack.IPHeaderFilter) (
var rt linux.XTRedirectTarget
buf = buf[:linux.SizeOfXTRedirectTarget]
- binary.Unmarshal(buf, usermem.ByteOrder, &rt)
+ binary.Unmarshal(buf, hostarch.ByteOrder, &rt)
// Copy linux.XTRedirectTarget to stack.RedirectTarget.
target := redirectTarget{RedirectTarget: stack.RedirectTarget{
@@ -372,7 +372,7 @@ func (*nfNATTargetMaker) marshal(target target) []byte {
nt.Range.MaxProto = nt.Range.MinProto
ret := make([]byte, 0, nfNATMarhsalledSize)
- return binary.Marshal(ret, usermem.ByteOrder, nt)
+ return binary.Marshal(ret, hostarch.ByteOrder, nt)
}
func (*nfNATTargetMaker) unmarshal(buf []byte, filter stack.IPHeaderFilter) (target, *syserr.Error) {
@@ -388,7 +388,7 @@ func (*nfNATTargetMaker) unmarshal(buf []byte, filter stack.IPHeaderFilter) (tar
var natRange linux.NFNATRange
buf = buf[linux.SizeOfXTEntryTarget:nfNATMarhsalledSize]
- binary.Unmarshal(buf, usermem.ByteOrder, &natRange)
+ binary.Unmarshal(buf, hostarch.ByteOrder, &natRange)
// We don't support port or address ranges.
if natRange.MinAddr != natRange.MaxAddr {
@@ -454,7 +454,7 @@ func parseTarget(filter stack.IPHeaderFilter, optVal []byte, ipv6 bool) (stack.T
}
var target linux.XTEntryTarget
buf := optVal[:linux.SizeOfXTEntryTarget]
- binary.Unmarshal(buf, usermem.ByteOrder, &target)
+ binary.Unmarshal(buf, hostarch.ByteOrder, &target)
return unmarshalTarget(target, filter, optVal)
}
@@ -487,11 +487,11 @@ func (jt *JumpTarget) Action(*stack.PacketBuffer, *stack.ConnTrack, stack.Hook,
func ntohs(port uint16) uint16 {
buf := make([]byte, 2)
binary.BigEndian.PutUint16(buf, port)
- return usermem.ByteOrder.Uint16(buf)
+ return hostarch.ByteOrder.Uint16(buf)
}
func htons(port uint16) uint16 {
buf := make([]byte, 2)
- usermem.ByteOrder.PutUint16(buf, port)
+ hostarch.ByteOrder.PutUint16(buf, port)
return binary.BigEndian.Uint16(buf)
}
diff --git a/pkg/sentry/socket/netfilter/tcp_matcher.go b/pkg/sentry/socket/netfilter/tcp_matcher.go
index 678d6b578..69557f515 100644
--- a/pkg/sentry/socket/netfilter/tcp_matcher.go
+++ b/pkg/sentry/socket/netfilter/tcp_matcher.go
@@ -19,9 +19,9 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/stack"
- "gvisor.dev/gvisor/pkg/usermem"
)
const matcherNameTCP = "tcp"
@@ -48,7 +48,7 @@ func (tcpMarshaler) marshal(mr matcher) []byte {
DestinationPortEnd: matcher.destinationPortEnd,
}
buf := make([]byte, 0, linux.SizeOfXTTCP)
- return marshalEntryMatch(matcherNameTCP, binary.Marshal(buf, usermem.ByteOrder, xttcp))
+ return marshalEntryMatch(matcherNameTCP, binary.Marshal(buf, hostarch.ByteOrder, xttcp))
}
// unmarshal implements matchMaker.unmarshal.
@@ -60,7 +60,7 @@ func (tcpMarshaler) unmarshal(buf []byte, filter stack.IPHeaderFilter) (stack.Ma
// For alignment reasons, the match's total size may
// exceed what's strictly necessary to hold matchData.
var matchData linux.XTTCP
- binary.Unmarshal(buf[:linux.SizeOfXTTCP], usermem.ByteOrder, &matchData)
+ binary.Unmarshal(buf[:linux.SizeOfXTTCP], hostarch.ByteOrder, &matchData)
nflog("parseMatchers: parsed XTTCP: %+v", matchData)
if matchData.Option != 0 ||
diff --git a/pkg/sentry/socket/netfilter/udp_matcher.go b/pkg/sentry/socket/netfilter/udp_matcher.go
index f8568873f..6a60e6bd6 100644
--- a/pkg/sentry/socket/netfilter/udp_matcher.go
+++ b/pkg/sentry/socket/netfilter/udp_matcher.go
@@ -19,9 +19,9 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/stack"
- "gvisor.dev/gvisor/pkg/usermem"
)
const matcherNameUDP = "udp"
@@ -48,7 +48,7 @@ func (udpMarshaler) marshal(mr matcher) []byte {
DestinationPortEnd: matcher.destinationPortEnd,
}
buf := make([]byte, 0, linux.SizeOfXTUDP)
- return marshalEntryMatch(matcherNameUDP, binary.Marshal(buf, usermem.ByteOrder, xtudp))
+ return marshalEntryMatch(matcherNameUDP, binary.Marshal(buf, hostarch.ByteOrder, xtudp))
}
// unmarshal implements matchMaker.unmarshal.
@@ -60,7 +60,7 @@ func (udpMarshaler) unmarshal(buf []byte, filter stack.IPHeaderFilter) (stack.Ma
// For alignment reasons, the match's total size may exceed what's
// strictly necessary to hold matchData.
var matchData linux.XTUDP
- binary.Unmarshal(buf[:linux.SizeOfXTUDP], usermem.ByteOrder, &matchData)
+ binary.Unmarshal(buf[:linux.SizeOfXTUDP], hostarch.ByteOrder, &matchData)
nflog("parseMatchers: parsed XTUDP: %+v", matchData)
if matchData.InverseFlags != 0 {
diff --git a/pkg/sentry/socket/netlink/BUILD b/pkg/sentry/socket/netlink/BUILD
index 9313e1167..171b95c63 100644
--- a/pkg/sentry/socket/netlink/BUILD
+++ b/pkg/sentry/socket/netlink/BUILD
@@ -16,6 +16,7 @@ go_library(
"//pkg/abi/linux",
"//pkg/binary",
"//pkg/context",
+ "//pkg/hostarch",
"//pkg/marshal",
"//pkg/marshal/primitive",
"//pkg/sentry/arch",
diff --git a/pkg/sentry/socket/netlink/message.go b/pkg/sentry/socket/netlink/message.go
index 0899c61d1..ab0e68af7 100644
--- a/pkg/sentry/socket/netlink/message.go
+++ b/pkg/sentry/socket/netlink/message.go
@@ -20,7 +20,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/binary"
- "gvisor.dev/gvisor/pkg/usermem"
+ "gvisor.dev/gvisor/pkg/hostarch"
)
// alignPad returns the length of padding required for alignment.
@@ -42,7 +42,7 @@ type Message struct {
func NewMessage(hdr linux.NetlinkMessageHeader) *Message {
return &Message{
hdr: hdr,
- buf: binary.Marshal(nil, usermem.ByteOrder, hdr),
+ buf: binary.Marshal(nil, hostarch.ByteOrder, hdr),
}
}
@@ -58,7 +58,7 @@ func ParseMessage(buf []byte) (msg *Message, rest []byte, ok bool) {
return
}
var hdr linux.NetlinkMessageHeader
- binary.Unmarshal(hdrBytes, usermem.ByteOrder, &hdr)
+ binary.Unmarshal(hdrBytes, hostarch.ByteOrder, &hdr)
// Msg portion.
totalMsgLen := int(hdr.Length)
@@ -105,7 +105,7 @@ func (m *Message) GetData(msg interface{}) (AttrsView, bool) {
if !ok {
return nil, false
}
- binary.Unmarshal(msgBytes, usermem.ByteOrder, msg)
+ binary.Unmarshal(msgBytes, hostarch.ByteOrder, msg)
numPad := alignPad(linux.NetlinkMessageHeaderSize+size, linux.NLMSG_ALIGNTO)
// Linux permits the last message not being aligned, just consume all of it.
@@ -126,7 +126,7 @@ func (m *Message) GetData(msg interface{}) (AttrsView, bool) {
// calling Finalize.
func (m *Message) Finalize() []byte {
// Update length, which is the first 4 bytes of the header.
- usermem.ByteOrder.PutUint32(m.buf, uint32(len(m.buf)))
+ hostarch.ByteOrder.PutUint32(m.buf, uint32(len(m.buf)))
// Align the message. Note that the message length in the header (set
// above) is the useful length of the message, not the total aligned
@@ -146,7 +146,7 @@ func (m *Message) putZeros(n int) {
// Put serializes v into the message.
func (m *Message) Put(v interface{}) {
- m.buf = binary.Marshal(m.buf, usermem.ByteOrder, v)
+ m.buf = binary.Marshal(m.buf, hostarch.ByteOrder, v)
}
// PutAttr adds v to the message as a netlink attribute.
@@ -251,7 +251,7 @@ func (v AttrsView) ParseFirst() (hdr linux.NetlinkAttrHeader, value []byte, rest
if !ok {
return
}
- binary.Unmarshal(hdrBytes, usermem.ByteOrder, &hdr)
+ binary.Unmarshal(hdrBytes, hostarch.ByteOrder, &hdr)
value, ok = b.Extract(int(hdr.Length) - linux.NetlinkAttrHeaderSize)
if !ok {
diff --git a/pkg/sentry/socket/netlink/socket.go b/pkg/sentry/socket/netlink/socket.go
index d5ffc75ce..30c297149 100644
--- a/pkg/sentry/socket/netlink/socket.go
+++ b/pkg/sentry/socket/netlink/socket.go
@@ -22,6 +22,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/binary"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/marshal"
"gvisor.dev/gvisor/pkg/marshal/primitive"
"gvisor.dev/gvisor/pkg/sentry/arch"
@@ -222,7 +223,7 @@ func ExtractSockAddr(b []byte) (*linux.SockAddrNetlink, *syserr.Error) {
}
var sa linux.SockAddrNetlink
- binary.Unmarshal(b[:linux.SockAddrNetlinkSize], usermem.ByteOrder, &sa)
+ binary.Unmarshal(b[:linux.SockAddrNetlinkSize], hostarch.ByteOrder, &sa)
if sa.Family != linux.AF_NETLINK {
return nil, syserr.ErrInvalidArgument
@@ -327,7 +328,7 @@ func (s *socketOpsCommon) Shutdown(t *kernel.Task, how int) *syserr.Error {
}
// GetSockOpt implements socket.Socket.GetSockOpt.
-func (s *socketOpsCommon) GetSockOpt(t *kernel.Task, level int, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error) {
+func (s *socketOpsCommon) GetSockOpt(t *kernel.Task, level int, name int, outPtr hostarch.Addr, outLen int) (marshal.Marshallable, *syserr.Error) {
switch level {
case linux.SOL_SOCKET:
switch name {
@@ -388,7 +389,7 @@ func (s *socketOpsCommon) SetSockOpt(t *kernel.Task, level int, name int, opt []
if len(opt) < sizeOfInt32 {
return syserr.ErrInvalidArgument
}
- size := usermem.ByteOrder.Uint32(opt)
+ size := hostarch.ByteOrder.Uint32(opt)
if size < minSendBufferSize {
size = minSendBufferSize
} else if size > maxSendBufferSize {
@@ -411,7 +412,7 @@ func (s *socketOpsCommon) SetSockOpt(t *kernel.Task, level int, name int, opt []
if len(opt) < sizeOfInt32 {
return syserr.ErrInvalidArgument
}
- passcred := usermem.ByteOrder.Uint32(opt)
+ passcred := hostarch.ByteOrder.Uint32(opt)
s.ep.SocketOptions().SetPassCred(passcred != 0)
return nil
diff --git a/pkg/sentry/socket/netstack/BUILD b/pkg/sentry/socket/netstack/BUILD
index 244d99436..0b39a5b67 100644
--- a/pkg/sentry/socket/netstack/BUILD
+++ b/pkg/sentry/socket/netstack/BUILD
@@ -21,6 +21,7 @@ go_library(
"//pkg/abi/linux",
"//pkg/binary",
"//pkg/context",
+ "//pkg/hostarch",
"//pkg/log",
"//pkg/marshal",
"//pkg/marshal/primitive",
diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go
index 64e70ab9d..ed6572bab 100644
--- a/pkg/sentry/socket/netstack/netstack.go
+++ b/pkg/sentry/socket/netstack/netstack.go
@@ -37,6 +37,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/binary"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/marshal"
"gvisor.dev/gvisor/pkg/marshal/primitive"
@@ -600,7 +601,7 @@ func (s *socketOpsCommon) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error {
return syserr.ErrInvalidArgument
}
- family := usermem.ByteOrder.Uint16(sockaddr)
+ family := hostarch.ByteOrder.Uint16(sockaddr)
var addr tcpip.FullAddress
// Bind for AF_PACKET requires only family, protocol and ifindex.
@@ -611,7 +612,7 @@ func (s *socketOpsCommon) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error {
if len(sockaddr) < sockAddrLinkSize {
return syserr.ErrInvalidArgument
}
- binary.Unmarshal(sockaddr[:sockAddrLinkSize], usermem.ByteOrder, &a)
+ binary.Unmarshal(sockaddr[:sockAddrLinkSize], hostarch.ByteOrder, &a)
if a.Protocol != uint16(s.protocol) {
return syserr.ErrInvalidArgument
@@ -757,7 +758,7 @@ func (s *socketOpsCommon) Shutdown(t *kernel.Task, how int) *syserr.Error {
// GetSockOpt implements the linux syscall getsockopt(2) for sockets backed by
// tcpip.Endpoint.
-func (s *SocketOperations) GetSockOpt(t *kernel.Task, level, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error) {
+func (s *SocketOperations) GetSockOpt(t *kernel.Task, level, name int, outPtr hostarch.Addr, outLen int) (marshal.Marshallable, *syserr.Error) {
// TODO(b/78348848): Unlike other socket options, SO_TIMESTAMP is
// implemented specifically for netstack.SocketOperations rather than
// commonEndpoint. commonEndpoint should be extended to support socket
@@ -793,7 +794,7 @@ func (s *SocketOperations) GetSockOpt(t *kernel.Task, level, name int, outPtr us
// GetSockOpt can be used to implement the linux syscall getsockopt(2) for
// sockets backed by a commonEndpoint.
-func GetSockOpt(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, family int, skType linux.SockType, level, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error) {
+func GetSockOpt(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, family int, skType linux.SockType, level, name int, outPtr hostarch.Addr, outLen int) (marshal.Marshallable, *syserr.Error) {
switch level {
case linux.SOL_SOCKET:
return getSockOptSocket(t, s, ep, family, skType, name, outLen)
@@ -1244,7 +1245,7 @@ func getSockOptTCP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name,
}
// getSockOptIPv6 implements GetSockOpt when level is SOL_IPV6.
-func getSockOptIPv6(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error) {
+func getSockOptIPv6(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name int, outPtr hostarch.Addr, outLen int) (marshal.Marshallable, *syserr.Error) {
if _, ok := ep.(tcpip.Endpoint); !ok {
log.Warningf("SOL_IPV6 options not supported on endpoints other than tcpip.Endpoint: option = %d", name)
return nil, syserr.ErrUnknownProtocolOption
@@ -1392,7 +1393,7 @@ func getSockOptIPv6(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name
}
// getSockOptIP implements GetSockOpt when level is SOL_IP.
-func getSockOptIP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name int, outPtr usermem.Addr, outLen int, family int) (marshal.Marshallable, *syserr.Error) {
+func getSockOptIP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name int, outPtr hostarch.Addr, outLen int, family int) (marshal.Marshallable, *syserr.Error) {
if _, ok := ep.(tcpip.Endpoint); !ok {
log.Warningf("SOL_IP options not supported on endpoints other than tcpip.Endpoint: option = %d", name)
return nil, syserr.ErrUnknownProtocolOption
@@ -1602,7 +1603,7 @@ func (s *SocketOperations) SetSockOpt(t *kernel.Task, level int, name int, optVa
}
s.readMu.Lock()
defer s.readMu.Unlock()
- s.sockOptTimestamp = usermem.ByteOrder.Uint32(optVal) != 0
+ s.sockOptTimestamp = hostarch.ByteOrder.Uint32(optVal) != 0
return nil
}
if level == linux.SOL_TCP && name == linux.TCP_INQ {
@@ -1611,7 +1612,7 @@ func (s *SocketOperations) SetSockOpt(t *kernel.Task, level int, name int, optVa
}
s.readMu.Lock()
defer s.readMu.Unlock()
- s.sockOptInq = usermem.ByteOrder.Uint32(optVal) != 0
+ s.sockOptInq = hostarch.ByteOrder.Uint32(optVal) != 0
return nil
}
@@ -1659,7 +1660,7 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam
return syserr.ErrInvalidArgument
}
- v := usermem.ByteOrder.Uint32(optVal)
+ v := hostarch.ByteOrder.Uint32(optVal)
ep.SocketOptions().SetSendBufferSize(int64(v), true)
return nil
@@ -1668,7 +1669,7 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam
return syserr.ErrInvalidArgument
}
- v := usermem.ByteOrder.Uint32(optVal)
+ v := hostarch.ByteOrder.Uint32(optVal)
return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.ReceiveBufferSizeOption, int(v)))
case linux.SO_REUSEADDR:
@@ -1676,7 +1677,7 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam
return syserr.ErrInvalidArgument
}
- v := usermem.ByteOrder.Uint32(optVal)
+ v := hostarch.ByteOrder.Uint32(optVal)
ep.SocketOptions().SetReuseAddress(v != 0)
return nil
@@ -1685,7 +1686,7 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam
return syserr.ErrInvalidArgument
}
- v := usermem.ByteOrder.Uint32(optVal)
+ v := hostarch.ByteOrder.Uint32(optVal)
ep.SocketOptions().SetReusePort(v != 0)
return nil
@@ -1714,7 +1715,7 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam
return syserr.ErrInvalidArgument
}
- v := usermem.ByteOrder.Uint32(optVal)
+ v := hostarch.ByteOrder.Uint32(optVal)
ep.SocketOptions().SetBroadcast(v != 0)
return nil
@@ -1723,7 +1724,7 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam
return syserr.ErrInvalidArgument
}
- v := usermem.ByteOrder.Uint32(optVal)
+ v := hostarch.ByteOrder.Uint32(optVal)
ep.SocketOptions().SetPassCred(v != 0)
return nil
@@ -1732,7 +1733,7 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam
return syserr.ErrInvalidArgument
}
- v := usermem.ByteOrder.Uint32(optVal)
+ v := hostarch.ByteOrder.Uint32(optVal)
ep.SocketOptions().SetKeepAlive(v != 0)
return nil
@@ -1742,7 +1743,7 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam
}
var v linux.Timeval
- binary.Unmarshal(optVal[:linux.SizeOfTimeval], usermem.ByteOrder, &v)
+ binary.Unmarshal(optVal[:linux.SizeOfTimeval], hostarch.ByteOrder, &v)
if v.Usec < 0 || v.Usec >= int64(time.Second/time.Microsecond) {
return syserr.ErrDomain
}
@@ -1755,7 +1756,7 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam
}
var v linux.Timeval
- binary.Unmarshal(optVal[:linux.SizeOfTimeval], usermem.ByteOrder, &v)
+ binary.Unmarshal(optVal[:linux.SizeOfTimeval], hostarch.ByteOrder, &v)
if v.Usec < 0 || v.Usec >= int64(time.Second/time.Microsecond) {
return syserr.ErrDomain
}
@@ -1767,7 +1768,7 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam
return syserr.ErrInvalidArgument
}
- v := usermem.ByteOrder.Uint32(optVal)
+ v := hostarch.ByteOrder.Uint32(optVal)
if v == 0 {
socket.SetSockOptEmitUnimplementedEvent(t, name)
@@ -1781,7 +1782,7 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam
return syserr.ErrInvalidArgument
}
- v := usermem.ByteOrder.Uint32(optVal)
+ v := hostarch.ByteOrder.Uint32(optVal)
ep.SocketOptions().SetNoChecksum(v != 0)
return nil
@@ -1791,7 +1792,7 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam
}
var v linux.Linger
- binary.Unmarshal(optVal[:linux.SizeOfLinger], usermem.ByteOrder, &v)
+ binary.Unmarshal(optVal[:linux.SizeOfLinger], hostarch.ByteOrder, &v)
ep.SocketOptions().SetLinger(tcpip.LingerOption{
Enabled: v.OnOff != 0,
@@ -1824,7 +1825,7 @@ func setSockOptTCP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name i
return syserr.ErrInvalidArgument
}
- v := usermem.ByteOrder.Uint32(optVal)
+ v := hostarch.ByteOrder.Uint32(optVal)
ep.SocketOptions().SetDelayOption(v == 0)
return nil
@@ -1833,7 +1834,7 @@ func setSockOptTCP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name i
return syserr.ErrInvalidArgument
}
- v := usermem.ByteOrder.Uint32(optVal)
+ v := hostarch.ByteOrder.Uint32(optVal)
ep.SocketOptions().SetCorkOption(v != 0)
return nil
@@ -1842,7 +1843,7 @@ func setSockOptTCP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name i
return syserr.ErrInvalidArgument
}
- v := usermem.ByteOrder.Uint32(optVal)
+ v := hostarch.ByteOrder.Uint32(optVal)
ep.SocketOptions().SetQuickAck(v != 0)
return nil
@@ -1851,7 +1852,7 @@ func setSockOptTCP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name i
return syserr.ErrInvalidArgument
}
- v := usermem.ByteOrder.Uint32(optVal)
+ v := hostarch.ByteOrder.Uint32(optVal)
return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.MaxSegOption, int(v)))
case linux.TCP_KEEPIDLE:
@@ -1859,7 +1860,7 @@ func setSockOptTCP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name i
return syserr.ErrInvalidArgument
}
- v := usermem.ByteOrder.Uint32(optVal)
+ v := hostarch.ByteOrder.Uint32(optVal)
if v < 1 || v > linux.MAX_TCP_KEEPIDLE {
return syserr.ErrInvalidArgument
}
@@ -1871,7 +1872,7 @@ func setSockOptTCP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name i
return syserr.ErrInvalidArgument
}
- v := usermem.ByteOrder.Uint32(optVal)
+ v := hostarch.ByteOrder.Uint32(optVal)
if v < 1 || v > linux.MAX_TCP_KEEPINTVL {
return syserr.ErrInvalidArgument
}
@@ -1883,7 +1884,7 @@ func setSockOptTCP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name i
return syserr.ErrInvalidArgument
}
- v := usermem.ByteOrder.Uint32(optVal)
+ v := hostarch.ByteOrder.Uint32(optVal)
if v < 1 || v > linux.MAX_TCP_KEEPCNT {
return syserr.ErrInvalidArgument
}
@@ -1894,7 +1895,7 @@ func setSockOptTCP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name i
return syserr.ErrInvalidArgument
}
- v := int32(usermem.ByteOrder.Uint32(optVal))
+ v := int32(hostarch.ByteOrder.Uint32(optVal))
if v < 0 {
return syserr.ErrInvalidArgument
}
@@ -1913,7 +1914,7 @@ func setSockOptTCP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name i
return syserr.ErrInvalidArgument
}
- v := int32(usermem.ByteOrder.Uint32(optVal))
+ v := int32(hostarch.ByteOrder.Uint32(optVal))
opt := tcpip.TCPLingerTimeoutOption(time.Second * time.Duration(v))
return syserr.TranslateNetstackError(ep.SetSockOpt(&opt))
@@ -1921,7 +1922,7 @@ func setSockOptTCP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name i
if len(optVal) < sizeOfInt32 {
return syserr.ErrInvalidArgument
}
- v := int32(usermem.ByteOrder.Uint32(optVal))
+ v := int32(hostarch.ByteOrder.Uint32(optVal))
if v < 0 {
v = 0
}
@@ -1932,7 +1933,7 @@ func setSockOptTCP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name i
if len(optVal) < sizeOfInt32 {
return syserr.ErrInvalidArgument
}
- v := usermem.ByteOrder.Uint32(optVal)
+ v := hostarch.ByteOrder.Uint32(optVal)
return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.TCPSynCountOption, int(v)))
@@ -1940,7 +1941,7 @@ func setSockOptTCP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name i
if len(optVal) < sizeOfInt32 {
return syserr.ErrInvalidArgument
}
- v := usermem.ByteOrder.Uint32(optVal)
+ v := hostarch.ByteOrder.Uint32(optVal)
return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.TCPWindowClampOption, int(v)))
@@ -1978,7 +1979,7 @@ func setSockOptIPv6(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name
return syserr.ErrInvalidEndpointState
}
- v := usermem.ByteOrder.Uint32(optVal)
+ v := hostarch.ByteOrder.Uint32(optVal)
ep.SocketOptions().SetV6Only(v != 0)
return nil
@@ -2024,7 +2025,7 @@ func setSockOptIPv6(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name
if len(optVal) < sizeOfInt32 {
return syserr.ErrInvalidArgument
}
- v := int32(usermem.ByteOrder.Uint32(optVal))
+ v := int32(hostarch.ByteOrder.Uint32(optVal))
ep.SocketOptions().SetReceiveOriginalDstAddress(v != 0)
return nil
@@ -2033,7 +2034,7 @@ func setSockOptIPv6(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name
if len(optVal) < sizeOfInt32 {
return syserr.ErrInvalidArgument
}
- v := int32(usermem.ByteOrder.Uint32(optVal))
+ v := int32(hostarch.ByteOrder.Uint32(optVal))
if v < -1 || v > 255 {
return syserr.ErrInvalidArgument
}
@@ -2117,12 +2118,12 @@ func copyInMulticastRequest(optVal []byte, allowAddr bool) (linux.InetMulticastR
if len(optVal) >= inetMulticastRequestWithNICSize {
var req linux.InetMulticastRequestWithNIC
- binary.Unmarshal(optVal[:inetMulticastRequestWithNICSize], usermem.ByteOrder, &req)
+ binary.Unmarshal(optVal[:inetMulticastRequestWithNICSize], hostarch.ByteOrder, &req)
return req, nil
}
var req linux.InetMulticastRequestWithNIC
- binary.Unmarshal(optVal[:inetMulticastRequestSize], usermem.ByteOrder, &req.InetMulticastRequest)
+ binary.Unmarshal(optVal[:inetMulticastRequestSize], hostarch.ByteOrder, &req.InetMulticastRequest)
return req, nil
}
@@ -2132,7 +2133,7 @@ func copyInMulticastV6Request(optVal []byte) (linux.Inet6MulticastRequest, *syse
}
var req linux.Inet6MulticastRequest
- binary.Unmarshal(optVal[:inet6MulticastRequestSize], usermem.ByteOrder, &req)
+ binary.Unmarshal(optVal[:inet6MulticastRequestSize], hostarch.ByteOrder, &req)
return req, nil
}
@@ -2145,7 +2146,7 @@ func parseIntOrChar(buf []byte) (int32, *syserr.Error) {
}
if len(buf) >= sizeOfInt32 {
- return int32(usermem.ByteOrder.Uint32(buf)), nil
+ return int32(hostarch.ByteOrder.Uint32(buf)), nil
}
return int32(buf[0]), nil
@@ -3007,7 +3008,7 @@ func interfaceIoctl(ctx context.Context, io usermem.IO, arg int, ifr *linux.IFRe
if arg == linux.SIOCGIFNAME {
// Gets the name of the interface given the interface index
// stored in ifr_ifindex.
- index = int32(usermem.ByteOrder.Uint32(ifr.Data[:4]))
+ index = int32(hostarch.ByteOrder.Uint32(ifr.Data[:4]))
if iface, ok := stack.Interfaces()[index]; ok {
ifr.SetName(iface.Name)
return nil
@@ -3029,7 +3030,7 @@ func interfaceIoctl(ctx context.Context, io usermem.IO, arg int, ifr *linux.IFRe
switch arg {
case linux.SIOCGIFINDEX:
// Copy out the index to the data.
- usermem.ByteOrder.PutUint32(ifr.Data[:], uint32(index))
+ hostarch.ByteOrder.PutUint32(ifr.Data[:], uint32(index))
case linux.SIOCGIFHWADDR:
// Copy the hardware address out.
@@ -3042,7 +3043,7 @@ func interfaceIoctl(ctx context.Context, io usermem.IO, arg int, ifr *linux.IFRe
// sockaddr. sa_family contains the ARPHRD_* device type,
// sa_data the L2 hardware address starting from byte 0. Setting
// the hardware address is a privileged operation.
- usermem.ByteOrder.PutUint16(ifr.Data[:], iface.DeviceType)
+ hostarch.ByteOrder.PutUint16(ifr.Data[:], iface.DeviceType)
n := copy(ifr.Data[2:], iface.Addr)
for i := 2 + n; i < len(ifr.Data); i++ {
ifr.Data[i] = 0 // Clear padding.
@@ -3055,7 +3056,7 @@ func interfaceIoctl(ctx context.Context, io usermem.IO, arg int, ifr *linux.IFRe
}
// Drop the flags that don't fit in the size that we need to return. This
// matches Linux behavior.
- usermem.ByteOrder.PutUint16(ifr.Data[:2], uint16(f))
+ hostarch.ByteOrder.PutUint16(ifr.Data[:2], uint16(f))
case linux.SIOCGIFADDR:
// Copy the IPv4 address out.
@@ -3071,11 +3072,11 @@ func interfaceIoctl(ctx context.Context, io usermem.IO, arg int, ifr *linux.IFRe
case linux.SIOCGIFMETRIC:
// Gets the metric of the device. As per netdevice(7), this
// always just sets ifr_metric to 0.
- usermem.ByteOrder.PutUint32(ifr.Data[:4], 0)
+ hostarch.ByteOrder.PutUint32(ifr.Data[:4], 0)
case linux.SIOCGIFMTU:
// Gets the MTU of the device.
- usermem.ByteOrder.PutUint32(ifr.Data[:4], iface.MTU)
+ hostarch.ByteOrder.PutUint32(ifr.Data[:4], iface.MTU)
case linux.SIOCGIFMAP:
// Gets the hardware parameters of the device.
@@ -3101,8 +3102,8 @@ func interfaceIoctl(ctx context.Context, io usermem.IO, arg int, ifr *linux.IFRe
continue
}
// Populate ifr.ifr_netmask (type sockaddr).
- usermem.ByteOrder.PutUint16(ifr.Data[0:2], uint16(linux.AF_INET))
- usermem.ByteOrder.PutUint16(ifr.Data[2:4], 0)
+ hostarch.ByteOrder.PutUint16(ifr.Data[0:2], uint16(linux.AF_INET))
+ hostarch.ByteOrder.PutUint16(ifr.Data[2:4], 0)
var mask uint32 = 0xffffffff << (32 - addr.PrefixLen)
// Netmask is expected to be returned as a big endian
// value.
@@ -3157,14 +3158,14 @@ func ifconfIoctl(ctx context.Context, t *kernel.Task, io usermem.IO, ifc *linux.
// Populate ifr.ifr_addr.
ifr := linux.IFReq{}
ifr.SetName(iface.Name)
- usermem.ByteOrder.PutUint16(ifr.Data[0:2], uint16(ifaceAddr.Family))
- usermem.ByteOrder.PutUint16(ifr.Data[2:4], 0)
+ hostarch.ByteOrder.PutUint16(ifr.Data[0:2], uint16(ifaceAddr.Family))
+ hostarch.ByteOrder.PutUint16(ifr.Data[2:4], 0)
copy(ifr.Data[4:8], ifaceAddr.Addr[:4])
// Copy the ifr to userspace.
dst := uintptr(ifc.Ptr) + uintptr(ifc.Len)
ifc.Len += int32(linux.SizeOfIFReq)
- if _, err := ifr.CopyOut(t, usermem.Addr(dst)); err != nil {
+ if _, err := ifr.CopyOut(t, hostarch.Addr(dst)); err != nil {
return err
}
}
diff --git a/pkg/sentry/socket/netstack/netstack_vfs2.go b/pkg/sentry/socket/netstack/netstack_vfs2.go
index fc29f8f13..30f3ad153 100644
--- a/pkg/sentry/socket/netstack/netstack_vfs2.go
+++ b/pkg/sentry/socket/netstack/netstack_vfs2.go
@@ -17,6 +17,7 @@ package netstack
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/marshal"
"gvisor.dev/gvisor/pkg/marshal/primitive"
"gvisor.dev/gvisor/pkg/sentry/arch"
@@ -197,7 +198,7 @@ func (s *SocketVFS2) Ioctl(ctx context.Context, uio usermem.IO, args arch.Syscal
// GetSockOpt implements the linux syscall getsockopt(2) for sockets backed by
// tcpip.Endpoint.
-func (s *SocketVFS2) GetSockOpt(t *kernel.Task, level, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error) {
+func (s *SocketVFS2) GetSockOpt(t *kernel.Task, level, name int, outPtr hostarch.Addr, outLen int) (marshal.Marshallable, *syserr.Error) {
// TODO(b/78348848): Unlike other socket options, SO_TIMESTAMP is
// implemented specifically for netstack.SocketVFS2 rather than
// commonEndpoint. commonEndpoint should be extended to support socket
@@ -245,7 +246,7 @@ func (s *SocketVFS2) SetSockOpt(t *kernel.Task, level int, name int, optVal []by
}
s.readMu.Lock()
defer s.readMu.Unlock()
- s.sockOptTimestamp = usermem.ByteOrder.Uint32(optVal) != 0
+ s.sockOptTimestamp = hostarch.ByteOrder.Uint32(optVal) != 0
return nil
}
if level == linux.SOL_TCP && name == linux.TCP_INQ {
@@ -254,7 +255,7 @@ func (s *SocketVFS2) SetSockOpt(t *kernel.Task, level int, name int, optVal []by
}
s.readMu.Lock()
defer s.readMu.Unlock()
- s.sockOptInq = usermem.ByteOrder.Uint32(optVal) != 0
+ s.sockOptInq = hostarch.ByteOrder.Uint32(optVal) != 0
return nil
}
diff --git a/pkg/sentry/socket/socket.go b/pkg/sentry/socket/socket.go
index 909341dcf..4c3d48096 100644
--- a/pkg/sentry/socket/socket.go
+++ b/pkg/sentry/socket/socket.go
@@ -26,6 +26,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/binary"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/marshal"
"gvisor.dev/gvisor/pkg/sentry/device"
"gvisor.dev/gvisor/pkg/sentry/fs"
@@ -216,7 +217,7 @@ type SocketOps interface {
Shutdown(t *kernel.Task, how int) *syserr.Error
// GetSockOpt implements the getsockopt(2) linux unix.
- GetSockOpt(t *kernel.Task, level int, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error)
+ GetSockOpt(t *kernel.Task, level int, name int, outPtr hostarch.Addr, outLen int) (marshal.Marshallable, *syserr.Error)
// SetSockOpt implements the setsockopt(2) linux unix.
SetSockOpt(t *kernel.Task, level int, name int, opt []byte) *syserr.Error
@@ -356,7 +357,7 @@ func NewDirent(ctx context.Context, d *device.Device) *fs.Dirent {
Type: fs.Socket,
DeviceID: d.DeviceID(),
InodeID: ino,
- BlockSize: usermem.PageSize,
+ BlockSize: hostarch.PageSize,
})
// Dirent name matches net/socket.c:sockfs_dname.
@@ -571,19 +572,19 @@ func UnmarshalSockAddr(family int, data []byte) linux.SockAddr {
switch family {
case unix.AF_INET:
var addr linux.SockAddrInet
- binary.Unmarshal(data[:unix.SizeofSockaddrInet4], usermem.ByteOrder, &addr)
+ binary.Unmarshal(data[:unix.SizeofSockaddrInet4], hostarch.ByteOrder, &addr)
return &addr
case unix.AF_INET6:
var addr linux.SockAddrInet6
- binary.Unmarshal(data[:unix.SizeofSockaddrInet6], usermem.ByteOrder, &addr)
+ binary.Unmarshal(data[:unix.SizeofSockaddrInet6], hostarch.ByteOrder, &addr)
return &addr
case unix.AF_UNIX:
var addr linux.SockAddrUnix
- binary.Unmarshal(data[:unix.SizeofSockaddrUnix], usermem.ByteOrder, &addr)
+ binary.Unmarshal(data[:unix.SizeofSockaddrUnix], hostarch.ByteOrder, &addr)
return &addr
case unix.AF_NETLINK:
var addr linux.SockAddrNetlink
- binary.Unmarshal(data[:unix.SizeofSockaddrNetlink], usermem.ByteOrder, &addr)
+ binary.Unmarshal(data[:unix.SizeofSockaddrNetlink], hostarch.ByteOrder, &addr)
return &addr
default:
panic(fmt.Sprintf("Unsupported socket family %v", family))
@@ -693,7 +694,7 @@ func AddressAndFamily(addr []byte) (tcpip.FullAddress, uint16, *syserr.Error) {
}
// Get the rest of the fields based on the address family.
- switch family := usermem.ByteOrder.Uint16(addr); family {
+ switch family := hostarch.ByteOrder.Uint16(addr); family {
case linux.AF_UNIX:
path := addr[2:]
if len(path) > linux.UnixPathMax {
@@ -715,7 +716,7 @@ func AddressAndFamily(addr []byte) (tcpip.FullAddress, uint16, *syserr.Error) {
if len(addr) < sockAddrInetSize {
return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument
}
- binary.Unmarshal(addr[:sockAddrInetSize], usermem.ByteOrder, &a)
+ binary.Unmarshal(addr[:sockAddrInetSize], hostarch.ByteOrder, &a)
out := tcpip.FullAddress{
Addr: BytesToIPAddress(a.Addr[:]),
@@ -728,7 +729,7 @@ func AddressAndFamily(addr []byte) (tcpip.FullAddress, uint16, *syserr.Error) {
if len(addr) < sockAddrInet6Size {
return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument
}
- binary.Unmarshal(addr[:sockAddrInet6Size], usermem.ByteOrder, &a)
+ binary.Unmarshal(addr[:sockAddrInet6Size], hostarch.ByteOrder, &a)
out := tcpip.FullAddress{
Addr: BytesToIPAddress(a.Addr[:]),
@@ -744,7 +745,7 @@ func AddressAndFamily(addr []byte) (tcpip.FullAddress, uint16, *syserr.Error) {
if len(addr) < sockAddrLinkSize {
return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument
}
- binary.Unmarshal(addr[:sockAddrLinkSize], usermem.ByteOrder, &a)
+ binary.Unmarshal(addr[:sockAddrLinkSize], hostarch.ByteOrder, &a)
if a.Family != linux.AF_PACKET || a.HardwareAddrLen != header.EthernetAddressSize {
return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument
}
diff --git a/pkg/sentry/socket/unix/BUILD b/pkg/sentry/socket/unix/BUILD
index ff53a26b7..c9cbefb3a 100644
--- a/pkg/sentry/socket/unix/BUILD
+++ b/pkg/sentry/socket/unix/BUILD
@@ -40,6 +40,7 @@ go_library(
"//pkg/abi/linux",
"//pkg/context",
"//pkg/fspath",
+ "//pkg/hostarch",
"//pkg/log",
"//pkg/marshal",
"//pkg/refs",
diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go
index b22f7973a..db7b1affe 100644
--- a/pkg/sentry/socket/unix/unix.go
+++ b/pkg/sentry/socket/unix/unix.go
@@ -24,6 +24,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fspath"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/marshal"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fs"
@@ -192,7 +193,7 @@ func (s *SocketOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO,
// GetSockOpt implements the linux syscall getsockopt(2) for sockets backed by
// a transport.Endpoint.
-func (s *SocketOperations) GetSockOpt(t *kernel.Task, level, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error) {
+func (s *SocketOperations) GetSockOpt(t *kernel.Task, level, name int, outPtr hostarch.Addr, outLen int) (marshal.Marshallable, *syserr.Error) {
return netstack.GetSockOpt(t, s, s.ep, linux.AF_UNIX, s.ep.Type(), level, name, outPtr, outLen)
}
diff --git a/pkg/sentry/socket/unix/unix_vfs2.go b/pkg/sentry/socket/unix/unix_vfs2.go
index 7890d1048..c39e317ff 100644
--- a/pkg/sentry/socket/unix/unix_vfs2.go
+++ b/pkg/sentry/socket/unix/unix_vfs2.go
@@ -18,6 +18,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fspath"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/marshal"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/sockfs"
@@ -112,7 +113,7 @@ func (s *SocketVFS2) Release(ctx context.Context) {
// GetSockOpt implements the linux syscall getsockopt(2) for sockets backed by
// a transport.Endpoint.
-func (s *SocketVFS2) GetSockOpt(t *kernel.Task, level, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error) {
+func (s *SocketVFS2) GetSockOpt(t *kernel.Task, level, name int, outPtr hostarch.Addr, outLen int) (marshal.Marshallable, *syserr.Error) {
return netstack.GetSockOpt(t, s, s.ep, linux.AF_UNIX, s.ep.Type(), level, name, outPtr, outLen)
}
diff --git a/pkg/sentry/strace/BUILD b/pkg/sentry/strace/BUILD
index 1b7fd2232..2ebd77f82 100644
--- a/pkg/sentry/strace/BUILD
+++ b/pkg/sentry/strace/BUILD
@@ -28,6 +28,7 @@ go_library(
"//pkg/binary",
"//pkg/bits",
"//pkg/eventchannel",
+ "//pkg/hostarch",
"//pkg/marshal/primitive",
"//pkg/seccomp",
"//pkg/sentry/arch",
@@ -35,7 +36,6 @@ go_library(
"//pkg/sentry/socket",
"//pkg/sentry/socket/netlink",
"//pkg/sentry/syscalls/linux",
- "//pkg/usermem",
"@org_golang_x_sys//unix:go_default_library",
],
)
diff --git a/pkg/sentry/strace/epoll.go b/pkg/sentry/strace/epoll.go
index ae3b998c8..48650e3f9 100644
--- a/pkg/sentry/strace/epoll.go
+++ b/pkg/sentry/strace/epoll.go
@@ -21,10 +21,11 @@ import (
"gvisor.dev/gvisor/pkg/abi"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/sentry/kernel"
- "gvisor.dev/gvisor/pkg/usermem"
+
+ "gvisor.dev/gvisor/pkg/hostarch"
)
-func epollEvent(t *kernel.Task, eventAddr usermem.Addr) string {
+func epollEvent(t *kernel.Task, eventAddr hostarch.Addr) string {
var e linux.EpollEvent
if _, err := e.CopyIn(t, eventAddr); err != nil {
return fmt.Sprintf("%#x {error reading event: %v}", eventAddr, err)
@@ -35,7 +36,7 @@ func epollEvent(t *kernel.Task, eventAddr usermem.Addr) string {
return sb.String()
}
-func epollEvents(t *kernel.Task, eventsAddr usermem.Addr, numEvents, maxBytes uint64) string {
+func epollEvents(t *kernel.Task, eventsAddr hostarch.Addr, numEvents, maxBytes uint64) string {
var sb strings.Builder
fmt.Fprintf(&sb, "%#x {", eventsAddr)
addr := eventsAddr
diff --git a/pkg/sentry/strace/poll.go b/pkg/sentry/strace/poll.go
index 074e80f9b..572a8b50b 100644
--- a/pkg/sentry/strace/poll.go
+++ b/pkg/sentry/strace/poll.go
@@ -22,7 +22,8 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/sentry/kernel"
slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux"
- "gvisor.dev/gvisor/pkg/usermem"
+
+ "gvisor.dev/gvisor/pkg/hostarch"
)
// PollEventSet is the set of poll(2) event flags.
@@ -52,7 +53,7 @@ func pollFD(t *kernel.Task, pfd *linux.PollFD, post bool) string {
return fmt.Sprintf("{FD: %s, Events: %s, REvents: %s}", fd(t, pfd.FD), PollEventSet.Parse(uint64(pfd.Events)), revents)
}
-func pollFDs(t *kernel.Task, addr usermem.Addr, nfds uint, post bool) string {
+func pollFDs(t *kernel.Task, addr hostarch.Addr, nfds uint, post bool) string {
if addr == 0 {
return "null"
}
diff --git a/pkg/sentry/strace/select.go b/pkg/sentry/strace/select.go
index 3a4c32aa0..e6e928157 100644
--- a/pkg/sentry/strace/select.go
+++ b/pkg/sentry/strace/select.go
@@ -19,7 +19,8 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/syscalls/linux"
- "gvisor.dev/gvisor/pkg/usermem"
+
+ "gvisor.dev/gvisor/pkg/hostarch"
)
func fdsFromSet(t *kernel.Task, set []byte) []int {
@@ -35,7 +36,7 @@ func fdsFromSet(t *kernel.Task, set []byte) []int {
return fds
}
-func fdSet(t *kernel.Task, nfds int, addr usermem.Addr) string {
+func fdSet(t *kernel.Task, nfds int, addr hostarch.Addr) string {
if nfds < 0 {
return fmt.Sprintf("%#x (negative nfds)", addr)
}
diff --git a/pkg/sentry/strace/signal.go b/pkg/sentry/strace/signal.go
index c41f36e3f..e5b379a20 100644
--- a/pkg/sentry/strace/signal.go
+++ b/pkg/sentry/strace/signal.go
@@ -21,7 +21,8 @@ import (
"gvisor.dev/gvisor/pkg/abi"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/sentry/kernel"
- "gvisor.dev/gvisor/pkg/usermem"
+
+ "gvisor.dev/gvisor/pkg/hostarch"
)
// signalNames contains the names of all named signals.
@@ -100,7 +101,7 @@ var sigActionFlags = abi.FlagSet{
},
}
-func sigSet(t *kernel.Task, addr usermem.Addr) string {
+func sigSet(t *kernel.Task, addr hostarch.Addr) string {
if addr == 0 {
return "null"
}
@@ -110,7 +111,7 @@ func sigSet(t *kernel.Task, addr usermem.Addr) string {
return fmt.Sprintf("%#x (error copying sigset: %v)", addr, err)
}
- set := linux.SignalSet(usermem.ByteOrder.Uint64(b[:]))
+ set := linux.SignalSet(hostarch.ByteOrder.Uint64(b[:]))
return fmt.Sprintf("%#x %s", addr, formatSigSet(set))
}
@@ -124,7 +125,7 @@ func formatSigSet(set linux.SignalSet) string {
return fmt.Sprintf("[%v]", strings.Join(signals, " "))
}
-func sigAction(t *kernel.Task, addr usermem.Addr) string {
+func sigAction(t *kernel.Task, addr hostarch.Addr) string {
if addr == 0 {
return "null"
}
diff --git a/pkg/sentry/strace/socket.go b/pkg/sentry/strace/socket.go
index d943a7cb1..e5b7f9b96 100644
--- a/pkg/sentry/strace/socket.go
+++ b/pkg/sentry/strace/socket.go
@@ -26,7 +26,8 @@ import (
"gvisor.dev/gvisor/pkg/sentry/socket"
"gvisor.dev/gvisor/pkg/sentry/socket/netlink"
slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux"
- "gvisor.dev/gvisor/pkg/usermem"
+
+ "gvisor.dev/gvisor/pkg/hostarch"
)
// SocketFamily are the possible socket(2) families.
@@ -161,7 +162,7 @@ var controlMessageType = map[int32]string{
linux.SO_TIMESTAMP: "SO_TIMESTAMP",
}
-func cmsghdr(t *kernel.Task, addr usermem.Addr, length uint64, maxBytes uint64) string {
+func cmsghdr(t *kernel.Task, addr hostarch.Addr, length uint64, maxBytes uint64) string {
if length > maxBytes {
return fmt.Sprintf("%#x (error decoding control: invalid length (%d))", addr, length)
}
@@ -180,7 +181,7 @@ func cmsghdr(t *kernel.Task, addr usermem.Addr, length uint64, maxBytes uint64)
}
var h linux.ControlMessageHeader
- binary.Unmarshal(buf[i:i+linux.SizeOfControlMessageHeader], usermem.ByteOrder, &h)
+ binary.Unmarshal(buf[i:i+linux.SizeOfControlMessageHeader], hostarch.ByteOrder, &h)
var skipData bool
level := "SOL_SOCKET"
@@ -230,7 +231,7 @@ func cmsghdr(t *kernel.Task, addr usermem.Addr, length uint64, maxBytes uint64)
numRights := rightsSize / linux.SizeOfControlMessageRight
fds := make(linux.ControlMessageRights, numRights)
- binary.Unmarshal(buf[i:i+rightsSize], usermem.ByteOrder, &fds)
+ binary.Unmarshal(buf[i:i+rightsSize], hostarch.ByteOrder, &fds)
rights := make([]string, 0, len(fds))
for _, fd := range fds {
@@ -257,7 +258,7 @@ func cmsghdr(t *kernel.Task, addr usermem.Addr, length uint64, maxBytes uint64)
}
var creds linux.ControlMessageCredentials
- binary.Unmarshal(buf[i:i+linux.SizeOfControlMessageCredentials], usermem.ByteOrder, &creds)
+ binary.Unmarshal(buf[i:i+linux.SizeOfControlMessageCredentials], hostarch.ByteOrder, &creds)
strs = append(strs, fmt.Sprintf(
"{level=%s, type=%s, length=%d, pid: %d, uid: %d, gid: %d}",
@@ -281,7 +282,7 @@ func cmsghdr(t *kernel.Task, addr usermem.Addr, length uint64, maxBytes uint64)
}
var tv linux.Timeval
- binary.Unmarshal(buf[i:i+linux.SizeOfTimeval], usermem.ByteOrder, &tv)
+ binary.Unmarshal(buf[i:i+linux.SizeOfTimeval], hostarch.ByteOrder, &tv)
strs = append(strs, fmt.Sprintf(
"{level=%s, type=%s, length=%d, Sec: %d, Usec: %d}",
@@ -301,7 +302,7 @@ func cmsghdr(t *kernel.Task, addr usermem.Addr, length uint64, maxBytes uint64)
return fmt.Sprintf("%#x %s", addr, strings.Join(strs, ", "))
}
-func msghdr(t *kernel.Task, addr usermem.Addr, printContent bool, maxBytes uint64) string {
+func msghdr(t *kernel.Task, addr hostarch.Addr, printContent bool, maxBytes uint64) string {
var msg slinux.MessageHeader64
if _, err := msg.CopyIn(t, addr); err != nil {
return fmt.Sprintf("%#x (error decoding msghdr: %v)", addr, err)
@@ -311,17 +312,17 @@ func msghdr(t *kernel.Task, addr usermem.Addr, printContent bool, maxBytes uint6
addr,
msg.Name,
msg.NameLen,
- iovecs(t, usermem.Addr(msg.Iov), int(msg.IovLen), printContent, maxBytes),
+ iovecs(t, hostarch.Addr(msg.Iov), int(msg.IovLen), printContent, maxBytes),
)
if printContent {
- s = fmt.Sprintf("%s, control={%s}", s, cmsghdr(t, usermem.Addr(msg.Control), msg.ControlLen, maxBytes))
+ s = fmt.Sprintf("%s, control={%s}", s, cmsghdr(t, hostarch.Addr(msg.Control), msg.ControlLen, maxBytes))
} else {
s = fmt.Sprintf("%s, control=%#x, control_len=%d", s, msg.Control, msg.ControlLen)
}
return fmt.Sprintf("%s, flags=%d}", s, msg.Flags)
}
-func sockAddr(t *kernel.Task, addr usermem.Addr, length uint32) string {
+func sockAddr(t *kernel.Task, addr hostarch.Addr, length uint32) string {
if addr == 0 {
return "null"
}
@@ -335,7 +336,7 @@ func sockAddr(t *kernel.Task, addr usermem.Addr, length uint32) string {
if len(b) < 2 {
return fmt.Sprintf("%#x {address too short: %d bytes}", addr, len(b))
}
- family := usermem.ByteOrder.Uint16(b)
+ family := hostarch.ByteOrder.Uint16(b)
familyStr := SocketFamily.Parse(uint64(family))
@@ -362,7 +363,7 @@ func sockAddr(t *kernel.Task, addr usermem.Addr, length uint32) string {
}
}
-func postSockAddr(t *kernel.Task, addr usermem.Addr, lengthPtr usermem.Addr) string {
+func postSockAddr(t *kernel.Task, addr hostarch.Addr, lengthPtr hostarch.Addr) string {
if addr == 0 {
return "null"
}
@@ -379,14 +380,14 @@ func postSockAddr(t *kernel.Task, addr usermem.Addr, lengthPtr usermem.Addr) str
return sockAddr(t, addr, l)
}
-func copySockLen(t *kernel.Task, addr usermem.Addr) (uint32, error) {
+func copySockLen(t *kernel.Task, addr hostarch.Addr) (uint32, error) {
// socklen_t is 32-bits.
var l primitive.Uint32
_, err := l.CopyIn(t, addr)
return uint32(l), err
}
-func sockLenPointer(t *kernel.Task, addr usermem.Addr) string {
+func sockLenPointer(t *kernel.Task, addr hostarch.Addr) string {
if addr == 0 {
return "null"
}
@@ -420,7 +421,7 @@ func sockFlags(flags int32) string {
return SocketFlagSet.Parse(uint64(flags))
}
-func getSockOptVal(t *kernel.Task, level, optname uint64, optVal usermem.Addr, optLen usermem.Addr, maximumBlobSize uint, rval uintptr) string {
+func getSockOptVal(t *kernel.Task, level, optname uint64, optVal hostarch.Addr, optLen hostarch.Addr, maximumBlobSize uint, rval uintptr) string {
if int(rval) < 0 {
return hexNum(uint64(optVal))
}
@@ -434,7 +435,7 @@ func getSockOptVal(t *kernel.Task, level, optname uint64, optVal usermem.Addr, o
return sockOptVal(t, level, optname, optVal, uint64(l), maximumBlobSize)
}
-func sockOptVal(t *kernel.Task, level, optname uint64, optVal usermem.Addr, optLen uint64, maximumBlobSize uint) string {
+func sockOptVal(t *kernel.Task, level, optname uint64, optVal hostarch.Addr, optLen uint64, maximumBlobSize uint) string {
switch optLen {
case 1:
var v primitive.Uint8
diff --git a/pkg/sentry/strace/strace.go b/pkg/sentry/strace/strace.go
index 396744597..ec5d5f846 100644
--- a/pkg/sentry/strace/strace.go
+++ b/pkg/sentry/strace/strace.go
@@ -32,7 +32,8 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel"
pb "gvisor.dev/gvisor/pkg/sentry/strace/strace_go_proto"
slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux"
- "gvisor.dev/gvisor/pkg/usermem"
+
+ "gvisor.dev/gvisor/pkg/hostarch"
)
// DefaultLogMaximumSize is the default LogMaximumSize.
@@ -62,7 +63,7 @@ func hexArg(arg arch.SyscallArgument) string {
return hexNum(arg.Uint64())
}
-func iovecs(t *kernel.Task, addr usermem.Addr, iovcnt int, printContent bool, maxBytes uint64) string {
+func iovecs(t *kernel.Task, addr hostarch.Addr, iovcnt int, printContent bool, maxBytes uint64) string {
if iovcnt < 0 || iovcnt > linux.UIO_MAXIOV {
return fmt.Sprintf("%#x (error decoding iovecs: invalid iovcnt)", addr)
}
@@ -107,7 +108,7 @@ func iovecs(t *kernel.Task, addr usermem.Addr, iovcnt int, printContent bool, ma
return fmt.Sprintf("%#x %s", addr, strings.Join(iovs, ", "))
}
-func dump(t *kernel.Task, addr usermem.Addr, size uint, maximumBlobSize uint) string {
+func dump(t *kernel.Task, addr hostarch.Addr, size uint, maximumBlobSize uint) string {
origSize := size
if size > maximumBlobSize {
size = maximumBlobSize
@@ -131,7 +132,7 @@ func dump(t *kernel.Task, addr usermem.Addr, size uint, maximumBlobSize uint) st
return fmt.Sprintf("%#x %q%s", addr, b[:amt], dot)
}
-func path(t *kernel.Task, addr usermem.Addr) string {
+func path(t *kernel.Task, addr hostarch.Addr) string {
path, err := t.CopyInString(addr, linux.PATH_MAX)
if err != nil {
return fmt.Sprintf("%#x (error decoding path: %s)", addr, err)
@@ -196,7 +197,7 @@ func fdVFS2(t *kernel.Task, fd int32) string {
return fmt.Sprintf("%#x %s", fd, name)
}
-func fdpair(t *kernel.Task, addr usermem.Addr) string {
+func fdpair(t *kernel.Task, addr hostarch.Addr) string {
var fds [2]int32
_, err := primitive.CopyInt32SliceIn(t, addr, fds[:])
if err != nil {
@@ -206,7 +207,7 @@ func fdpair(t *kernel.Task, addr usermem.Addr) string {
return fmt.Sprintf("%#x [%d %d]", addr, fds[0], fds[1])
}
-func uname(t *kernel.Task, addr usermem.Addr) string {
+func uname(t *kernel.Task, addr hostarch.Addr) string {
var u linux.UtsName
if _, err := u.CopyIn(t, addr); err != nil {
return fmt.Sprintf("%#x (error decoding utsname: %s)", addr, err)
@@ -215,7 +216,7 @@ func uname(t *kernel.Task, addr usermem.Addr) string {
return fmt.Sprintf("%#x %s", addr, u)
}
-func utimensTimespec(t *kernel.Task, addr usermem.Addr) string {
+func utimensTimespec(t *kernel.Task, addr hostarch.Addr) string {
if addr == 0 {
return "null"
}
@@ -237,7 +238,7 @@ func utimensTimespec(t *kernel.Task, addr usermem.Addr) string {
return fmt.Sprintf("%#x {sec=%v nsec=%s}", addr, tim.Sec, ns)
}
-func timespec(t *kernel.Task, addr usermem.Addr) string {
+func timespec(t *kernel.Task, addr hostarch.Addr) string {
if addr == 0 {
return "null"
}
@@ -249,7 +250,7 @@ func timespec(t *kernel.Task, addr usermem.Addr) string {
return fmt.Sprintf("%#x {sec=%v nsec=%v}", addr, tim.Sec, tim.Nsec)
}
-func timeval(t *kernel.Task, addr usermem.Addr) string {
+func timeval(t *kernel.Task, addr hostarch.Addr) string {
if addr == 0 {
return "null"
}
@@ -262,7 +263,7 @@ func timeval(t *kernel.Task, addr usermem.Addr) string {
return fmt.Sprintf("%#x {sec=%v usec=%v}", addr, tim.Sec, tim.Usec)
}
-func utimbuf(t *kernel.Task, addr usermem.Addr) string {
+func utimbuf(t *kernel.Task, addr hostarch.Addr) string {
if addr == 0 {
return "null"
}
@@ -275,7 +276,7 @@ func utimbuf(t *kernel.Task, addr usermem.Addr) string {
return fmt.Sprintf("%#x {actime=%v, modtime=%v}", addr, utim.Actime, utim.Modtime)
}
-func stat(t *kernel.Task, addr usermem.Addr) string {
+func stat(t *kernel.Task, addr hostarch.Addr) string {
if addr == 0 {
return "null"
}
@@ -287,27 +288,27 @@ func stat(t *kernel.Task, addr usermem.Addr) string {
return fmt.Sprintf("%#x {dev=%d, ino=%d, mode=%s, nlink=%d, uid=%d, gid=%d, rdev=%d, size=%d, blksize=%d, blocks=%d, atime=%s, mtime=%s, ctime=%s}", addr, stat.Dev, stat.Ino, linux.FileMode(stat.Mode), stat.Nlink, stat.UID, stat.GID, stat.Rdev, stat.Size, stat.Blksize, stat.Blocks, time.Unix(stat.ATime.Sec, stat.ATime.Nsec), time.Unix(stat.MTime.Sec, stat.MTime.Nsec), time.Unix(stat.CTime.Sec, stat.CTime.Nsec))
}
-func itimerval(t *kernel.Task, addr usermem.Addr) string {
+func itimerval(t *kernel.Task, addr hostarch.Addr) string {
if addr == 0 {
return "null"
}
interval := timeval(t, addr)
- value := timeval(t, addr+usermem.Addr((*linux.Timeval)(nil).SizeBytes()))
+ value := timeval(t, addr+hostarch.Addr((*linux.Timeval)(nil).SizeBytes()))
return fmt.Sprintf("%#x {interval=%s, value=%s}", addr, interval, value)
}
-func itimerspec(t *kernel.Task, addr usermem.Addr) string {
+func itimerspec(t *kernel.Task, addr hostarch.Addr) string {
if addr == 0 {
return "null"
}
interval := timespec(t, addr)
- value := timespec(t, addr+usermem.Addr((*linux.Timespec)(nil).SizeBytes()))
+ value := timespec(t, addr+hostarch.Addr((*linux.Timespec)(nil).SizeBytes()))
return fmt.Sprintf("%#x {interval=%s, value=%s}", addr, interval, value)
}
-func stringVector(t *kernel.Task, addr usermem.Addr) string {
+func stringVector(t *kernel.Task, addr hostarch.Addr) string {
vec, err := t.CopyInVector(addr, slinux.ExecMaxElemSize, slinux.ExecMaxTotalSize)
if err != nil {
return fmt.Sprintf("%#x {error copying vector: %v}", addr, err)
@@ -323,7 +324,7 @@ func stringVector(t *kernel.Task, addr usermem.Addr) string {
return s
}
-func rusage(t *kernel.Task, addr usermem.Addr) string {
+func rusage(t *kernel.Task, addr hostarch.Addr) string {
if addr == 0 {
return "null"
}
@@ -335,7 +336,7 @@ func rusage(t *kernel.Task, addr usermem.Addr) string {
return fmt.Sprintf("%#x %+v", addr, ru)
}
-func capHeader(t *kernel.Task, addr usermem.Addr) string {
+func capHeader(t *kernel.Task, addr hostarch.Addr) string {
if addr == 0 {
return "null"
}
@@ -360,7 +361,7 @@ func capHeader(t *kernel.Task, addr usermem.Addr) string {
return fmt.Sprintf("%#x {Version: %s, Pid: %d}", addr, version, hdr.Pid)
}
-func capData(t *kernel.Task, hdrAddr, dataAddr usermem.Addr) string {
+func capData(t *kernel.Task, hdrAddr, dataAddr hostarch.Addr) string {
if dataAddr == 0 {
return "null"
}
diff --git a/pkg/sentry/syscalls/linux/BUILD b/pkg/sentry/syscalls/linux/BUILD
index 3dcf36a96..408a6c422 100644
--- a/pkg/sentry/syscalls/linux/BUILD
+++ b/pkg/sentry/syscalls/linux/BUILD
@@ -64,6 +64,7 @@ go_library(
"//pkg/abi/linux",
"//pkg/bpf",
"//pkg/context",
+ "//pkg/hostarch",
"//pkg/log",
"//pkg/marshal",
"//pkg/marshal/primitive",
diff --git a/pkg/sentry/syscalls/linux/linux64.go b/pkg/sentry/syscalls/linux/linux64.go
index ac53a0c0e..2d2212605 100644
--- a/pkg/sentry/syscalls/linux/linux64.go
+++ b/pkg/sentry/syscalls/linux/linux64.go
@@ -18,11 +18,11 @@ package linux
import (
"gvisor.dev/gvisor/pkg/abi"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/syscalls"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
const (
@@ -405,7 +405,7 @@ var AMD64 = &kernel.SyscallTable{
434: syscalls.ErrorWithEvent("pidfd_open", syserror.ENOSYS, "", nil),
435: syscalls.ErrorWithEvent("clone3", syserror.ENOSYS, "", nil),
},
- Emulate: map[usermem.Addr]uintptr{
+ Emulate: map[hostarch.Addr]uintptr{
0xffffffffff600000: 96, // vsyscall gettimeofday(2)
0xffffffffff600400: 201, // vsyscall time(2)
0xffffffffff600800: 309, // vsyscall getcpu(2)
@@ -723,7 +723,7 @@ var ARM64 = &kernel.SyscallTable{
434: syscalls.ErrorWithEvent("pidfd_open", syserror.ENOSYS, "", nil),
435: syscalls.ErrorWithEvent("clone3", syserror.ENOSYS, "", nil),
},
- Emulate: map[usermem.Addr]uintptr{},
+ Emulate: map[hostarch.Addr]uintptr{},
Missing: func(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, error) {
t.Kernel().EmitUnimplementedEvent(t)
return 0, syserror.ENOSYS
diff --git a/pkg/sentry/syscalls/linux/sigset.go b/pkg/sentry/syscalls/linux/sigset.go
index 434559b80..e8c2d8f9e 100644
--- a/pkg/sentry/syscalls/linux/sigset.go
+++ b/pkg/sentry/syscalls/linux/sigset.go
@@ -16,9 +16,9 @@ package linux
import (
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
// CopyInSigSet copies in a sigset_t, checks its size, and ensures that KILL and
@@ -27,7 +27,7 @@ import (
// TODO(gvisor.dev/issue/1624): This is only exported because
// syscalls/vfs2/signal.go depends on it. Once vfs1 is deleted and the vfs2
// syscalls are moved into this package, then they can be unexported.
-func CopyInSigSet(t *kernel.Task, sigSetAddr usermem.Addr, size uint) (linux.SignalSet, error) {
+func CopyInSigSet(t *kernel.Task, sigSetAddr hostarch.Addr, size uint) (linux.SignalSet, error) {
if size != linux.SignalSetSize {
return 0, syserror.EINVAL
}
@@ -35,14 +35,14 @@ func CopyInSigSet(t *kernel.Task, sigSetAddr usermem.Addr, size uint) (linux.Sig
if _, err := t.CopyInBytes(sigSetAddr, b); err != nil {
return 0, err
}
- mask := usermem.ByteOrder.Uint64(b[:])
+ mask := hostarch.ByteOrder.Uint64(b[:])
return linux.SignalSet(mask) &^ kernel.UnblockableSignals, nil
}
// copyOutSigSet copies out a sigset_t.
-func copyOutSigSet(t *kernel.Task, sigSetAddr usermem.Addr, mask linux.SignalSet) error {
+func copyOutSigSet(t *kernel.Task, sigSetAddr hostarch.Addr, mask linux.SignalSet) error {
b := t.CopyScratchBuffer(8)
- usermem.ByteOrder.PutUint64(b, uint64(mask))
+ hostarch.ByteOrder.PutUint64(b, uint64(mask))
_, err := t.CopyOutBytes(sigSetAddr, b)
return err
}
@@ -55,15 +55,15 @@ func copyOutSigSet(t *kernel.Task, sigSetAddr usermem.Addr, mask linux.SignalSet
// };
//
// and returns sigset_addr and size.
-func copyInSigSetWithSize(t *kernel.Task, addr usermem.Addr) (usermem.Addr, uint, error) {
+func copyInSigSetWithSize(t *kernel.Task, addr hostarch.Addr) (hostarch.Addr, uint, error) {
switch t.Arch().Width() {
case 8:
in := t.CopyScratchBuffer(16)
if _, err := t.CopyInBytes(addr, in); err != nil {
return 0, 0, err
}
- maskAddr := usermem.Addr(usermem.ByteOrder.Uint64(in[0:]))
- maskSize := uint(usermem.ByteOrder.Uint64(in[8:]))
+ maskAddr := hostarch.Addr(hostarch.ByteOrder.Uint64(in[0:]))
+ maskSize := uint(hostarch.ByteOrder.Uint64(in[8:]))
return maskAddr, maskSize, nil
default:
return 0, 0, syserror.ENOSYS
diff --git a/pkg/sentry/syscalls/linux/sys_aio.go b/pkg/sentry/syscalls/linux/sys_aio.go
index c2285f796..70e8569a8 100644
--- a/pkg/sentry/syscalls/linux/sys_aio.go
+++ b/pkg/sentry/syscalls/linux/sys_aio.go
@@ -17,6 +17,7 @@ package linux
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/marshal/primitive"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fs"
@@ -152,7 +153,7 @@ func IoGetevents(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S
}
// Keep rolling.
- eventsAddr += usermem.Addr(linux.IOEventSize)
+ eventsAddr += hostarch.Addr(linux.IOEventSize)
}
// Everything finished.
@@ -191,12 +192,12 @@ func memoryFor(t *kernel.Task, cb *linux.IOCallback) (usermem.IOSequence, error)
// I/O.
switch cb.OpCode {
case linux.IOCB_CMD_PREAD, linux.IOCB_CMD_PWRITE:
- return t.SingleIOSequence(usermem.Addr(cb.Buf), bytes, usermem.IOOpts{
+ return t.SingleIOSequence(hostarch.Addr(cb.Buf), bytes, usermem.IOOpts{
AddressSpaceActive: false,
})
case linux.IOCB_CMD_PREADV, linux.IOCB_CMD_PWRITEV:
- return t.IovecsIOSequence(usermem.Addr(cb.Buf), bytes, usermem.IOOpts{
+ return t.IovecsIOSequence(hostarch.Addr(cb.Buf), bytes, usermem.IOOpts{
AddressSpaceActive: false,
})
@@ -219,7 +220,7 @@ func IoCancel(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
// LINT.IfChange
-func getAIOCallback(t *kernel.Task, file *fs.File, cbAddr usermem.Addr, cb *linux.IOCallback, ioseq usermem.IOSequence, actx *mm.AIOContext, eventFile *fs.File) kernel.AIOCallback {
+func getAIOCallback(t *kernel.Task, file *fs.File, cbAddr hostarch.Addr, cb *linux.IOCallback, ioseq usermem.IOSequence, actx *mm.AIOContext, eventFile *fs.File) kernel.AIOCallback {
return func(ctx context.Context) {
if actx.Dead() {
actx.CancelPendingRequest()
@@ -264,7 +265,7 @@ func getAIOCallback(t *kernel.Task, file *fs.File, cbAddr usermem.Addr, cb *linu
}
// submitCallback processes a single callback.
-func submitCallback(t *kernel.Task, id uint64, cb *linux.IOCallback, cbAddr usermem.Addr) error {
+func submitCallback(t *kernel.Task, id uint64, cb *linux.IOCallback, cbAddr hostarch.Addr) error {
file := t.GetFile(cb.FD)
if file == nil {
// File not found.
@@ -339,7 +340,7 @@ func IoSubmit(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
for i := int32(0); i < nrEvents; i++ {
// Copy in the callback address.
- var cbAddr usermem.Addr
+ var cbAddr hostarch.Addr
switch t.Arch().Width() {
case 8:
var cbAddrP primitive.Uint64
@@ -351,7 +352,7 @@ func IoSubmit(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
// Nothing done.
return 0, nil, err
}
- cbAddr = usermem.Addr(cbAddrP)
+ cbAddr = hostarch.Addr(cbAddrP)
default:
return 0, nil, syserror.ENOSYS
}
@@ -379,7 +380,7 @@ func IoSubmit(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
}
// Advance to the next one.
- addr += usermem.Addr(t.Arch().Width())
+ addr += hostarch.Addr(t.Arch().Width())
}
return uintptr(nrEvents), nil, nil
diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go
index fd9649340..9cd238efd 100644
--- a/pkg/sentry/syscalls/linux/sys_file.go
+++ b/pkg/sentry/syscalls/linux/sys_file.go
@@ -18,6 +18,7 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/marshal/primitive"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fs"
@@ -29,7 +30,6 @@ import (
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sentry/limits"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
// fileOpAt performs an operation on the second last component in the path.
@@ -115,7 +115,7 @@ func fileOpOn(t *kernel.Task, dirFD int32, path string, resolve bool, fn func(ro
}
// copyInPath copies a path in.
-func copyInPath(t *kernel.Task, addr usermem.Addr, allowEmpty bool) (path string, dirPath bool, err error) {
+func copyInPath(t *kernel.Task, addr hostarch.Addr, allowEmpty bool) (path string, dirPath bool, err error) {
path, err = t.CopyInString(addr, linux.PATH_MAX)
if err != nil {
return "", false, err
@@ -133,7 +133,7 @@ func copyInPath(t *kernel.Task, addr usermem.Addr, allowEmpty bool) (path string
// LINT.IfChange
-func openAt(t *kernel.Task, dirFD int32, addr usermem.Addr, flags uint) (fd uintptr, err error) {
+func openAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, flags uint) (fd uintptr, err error) {
path, dirPath, err := copyInPath(t, addr, false /* allowEmpty */)
if err != nil {
return 0, err
@@ -208,7 +208,7 @@ func openAt(t *kernel.Task, dirFD int32, addr usermem.Addr, flags uint) (fd uint
return fd, err // Use result in frame.
}
-func mknodAt(t *kernel.Task, dirFD int32, addr usermem.Addr, mode linux.FileMode) error {
+func mknodAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, mode linux.FileMode) error {
path, dirPath, err := copyInPath(t, addr, false /* allowEmpty */)
if err != nil {
return err
@@ -301,7 +301,7 @@ func Mknodat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
return 0, nil, mknodAt(t, dirFD, path, mode)
}
-func createAt(t *kernel.Task, dirFD int32, addr usermem.Addr, flags uint, mode linux.FileMode) (fd uintptr, err error) {
+func createAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, flags uint, mode linux.FileMode) (fd uintptr, err error) {
path, dirPath, err := copyInPath(t, addr, false /* allowEmpty */)
if err != nil {
return 0, err
@@ -515,7 +515,7 @@ func (ac accessContext) Value(key interface{}) interface{} {
}
}
-func accessAt(t *kernel.Task, dirFD int32, addr usermem.Addr, mode uint) error {
+func accessAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, mode uint) error {
const rOK = 4
const wOK = 2
const xOK = 1
@@ -694,7 +694,7 @@ func Getcwd(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
}
// Top it off with a terminator.
- _, err = t.CopyOutBytes(addr+usermem.Addr(bytes), []byte("\x00"))
+ _, err = t.CopyOutBytes(addr+hostarch.Addr(bytes), []byte("\x00"))
return uintptr(bytes + 1), nil, err
}
@@ -1164,7 +1164,7 @@ func Fadvise64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
// LINT.IfChange
-func mkdirAt(t *kernel.Task, dirFD int32, addr usermem.Addr, mode linux.FileMode) error {
+func mkdirAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, mode linux.FileMode) error {
path, _, err := copyInPath(t, addr, false /* allowEmpty */)
if err != nil {
return err
@@ -1216,7 +1216,7 @@ func Mkdirat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
return 0, nil, mkdirAt(t, dirFD, addr, mode)
}
-func rmdirAt(t *kernel.Task, dirFD int32, addr usermem.Addr) error {
+func rmdirAt(t *kernel.Task, dirFD int32, addr hostarch.Addr) error {
path, _, err := copyInPath(t, addr, false /* allowEmpty */)
if err != nil {
return err
@@ -1256,7 +1256,7 @@ func Rmdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
return 0, nil, rmdirAt(t, linux.AT_FDCWD, addr)
}
-func symlinkAt(t *kernel.Task, dirFD int32, newAddr usermem.Addr, oldAddr usermem.Addr) error {
+func symlinkAt(t *kernel.Task, dirFD int32, newAddr hostarch.Addr, oldAddr hostarch.Addr) error {
newPath, dirPath, err := copyInPath(t, newAddr, false /* allowEmpty */)
if err != nil {
return err
@@ -1341,7 +1341,7 @@ func mayLinkAt(t *kernel.Task, target *fs.Inode) error {
// linkAt creates a hard link to the target specified by oldDirFD and oldAddr,
// specified by newDirFD and newAddr. If resolve is true, then the symlinks
// will be followed when evaluating the target.
-func linkAt(t *kernel.Task, oldDirFD int32, oldAddr usermem.Addr, newDirFD int32, newAddr usermem.Addr, resolve, allowEmpty bool) error {
+func linkAt(t *kernel.Task, oldDirFD int32, oldAddr hostarch.Addr, newDirFD int32, newAddr hostarch.Addr, resolve, allowEmpty bool) error {
oldPath, _, err := copyInPath(t, oldAddr, allowEmpty)
if err != nil {
return err
@@ -1448,7 +1448,7 @@ func Linkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
// LINT.IfChange
-func readlinkAt(t *kernel.Task, dirFD int32, addr usermem.Addr, bufAddr usermem.Addr, size uint) (copied uintptr, err error) {
+func readlinkAt(t *kernel.Task, dirFD int32, addr hostarch.Addr, bufAddr hostarch.Addr, size uint) (copied uintptr, err error) {
path, dirPath, err := copyInPath(t, addr, false /* allowEmpty */)
if err != nil {
return 0, err
@@ -1511,7 +1511,7 @@ func Readlinkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy
// LINT.IfChange
-func unlinkAt(t *kernel.Task, dirFD int32, addr usermem.Addr) error {
+func unlinkAt(t *kernel.Task, dirFD int32, addr hostarch.Addr) error {
path, dirPath, err := copyInPath(t, addr, false /* allowEmpty */)
if err != nil {
return err
@@ -1728,7 +1728,7 @@ func chown(t *kernel.Task, d *fs.Dirent, uid auth.UID, gid auth.GID) error {
return nil
}
-func chownAt(t *kernel.Task, fd int32, addr usermem.Addr, resolve, allowEmpty bool, uid auth.UID, gid auth.GID) error {
+func chownAt(t *kernel.Task, fd int32, addr hostarch.Addr, resolve, allowEmpty bool, uid auth.UID, gid auth.GID) error {
path, _, err := copyInPath(t, addr, allowEmpty)
if err != nil {
return err
@@ -1815,7 +1815,7 @@ func chmod(t *kernel.Task, d *fs.Dirent, mode linux.FileMode) error {
return nil
}
-func chmodAt(t *kernel.Task, fd int32, addr usermem.Addr, mode linux.FileMode) error {
+func chmodAt(t *kernel.Task, fd int32, addr hostarch.Addr, mode linux.FileMode) error {
path, _, err := copyInPath(t, addr, false /* allowEmpty */)
if err != nil {
return err
@@ -1866,7 +1866,7 @@ func defaultSetToSystemTimeSpec() fs.TimeSpec {
}
}
-func utimes(t *kernel.Task, dirFD int32, addr usermem.Addr, ts fs.TimeSpec, resolve bool) error {
+func utimes(t *kernel.Task, dirFD int32, addr hostarch.Addr, ts fs.TimeSpec, resolve bool) error {
setTimestamp := func(root *fs.Dirent, d *fs.Dirent, _ uint) error {
// Does the task own the file?
if !d.Inode.CheckOwnership(t) {
@@ -2030,7 +2030,7 @@ func Futimesat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
// LINT.IfChange
-func renameAt(t *kernel.Task, oldDirFD int32, oldAddr usermem.Addr, newDirFD int32, newAddr usermem.Addr) error {
+func renameAt(t *kernel.Task, oldDirFD int32, oldAddr hostarch.Addr, newDirFD int32, newAddr hostarch.Addr) error {
newPath, _, err := copyInPath(t, newAddr, false /* allowEmpty */)
if err != nil {
return err
diff --git a/pkg/sentry/syscalls/linux/sys_futex.go b/pkg/sentry/syscalls/linux/sys_futex.go
index f39ce0639..eeea1613b 100644
--- a/pkg/sentry/syscalls/linux/sys_futex.go
+++ b/pkg/sentry/syscalls/linux/sys_futex.go
@@ -18,11 +18,11 @@ import (
"time"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
// futexWaitRestartBlock encapsulates the state required to restart futex(2)
@@ -41,7 +41,7 @@ type futexWaitRestartBlock struct {
// Restart implements kernel.SyscallRestartBlock.Restart.
func (f *futexWaitRestartBlock) Restart(t *kernel.Task) (uintptr, error) {
- return futexWaitDuration(t, f.duration, false, usermem.Addr(f.addr), f.private, f.val, f.mask)
+ return futexWaitDuration(t, f.duration, false, hostarch.Addr(f.addr), f.private, f.val, f.mask)
}
// futexWaitAbsolute performs a FUTEX_WAIT_BITSET, blocking until the wait is
@@ -51,7 +51,7 @@ func (f *futexWaitRestartBlock) Restart(t *kernel.Task) (uintptr, error) {
//
// If blocking is interrupted, the syscall is restarted with the original
// arguments.
-func futexWaitAbsolute(t *kernel.Task, clockRealtime bool, ts linux.Timespec, forever bool, addr usermem.Addr, private bool, val, mask uint32) (uintptr, error) {
+func futexWaitAbsolute(t *kernel.Task, clockRealtime bool, ts linux.Timespec, forever bool, addr hostarch.Addr, private bool, val, mask uint32) (uintptr, error) {
w := t.FutexWaiter()
err := t.Futex().WaitPrepare(w, t, addr, private, val, mask)
if err != nil {
@@ -87,7 +87,7 @@ func futexWaitAbsolute(t *kernel.Task, clockRealtime bool, ts linux.Timespec, fo
// syscall. If forever is true, the syscall is restarted with the original
// arguments. If forever is false, duration is a relative timeout and the
// syscall is restarted with the remaining timeout.
-func futexWaitDuration(t *kernel.Task, duration time.Duration, forever bool, addr usermem.Addr, private bool, val, mask uint32) (uintptr, error) {
+func futexWaitDuration(t *kernel.Task, duration time.Duration, forever bool, addr hostarch.Addr, private bool, val, mask uint32) (uintptr, error) {
w := t.FutexWaiter()
err := t.Futex().WaitPrepare(w, t, addr, private, val, mask)
if err != nil {
@@ -124,7 +124,7 @@ func futexWaitDuration(t *kernel.Task, duration time.Duration, forever bool, add
return 0, syserror.ERESTART_RESTARTBLOCK
}
-func futexLockPI(t *kernel.Task, ts linux.Timespec, forever bool, addr usermem.Addr, private bool) error {
+func futexLockPI(t *kernel.Task, ts linux.Timespec, forever bool, addr hostarch.Addr, private bool) error {
w := t.FutexWaiter()
locked, err := t.Futex().LockPI(w, t, addr, uint32(t.ThreadID()), private, false)
if err != nil {
@@ -152,7 +152,7 @@ func futexLockPI(t *kernel.Task, ts linux.Timespec, forever bool, addr usermem.A
return syserror.ConvertIntr(err, syserror.ERESTARTSYS)
}
-func tryLockPI(t *kernel.Task, addr usermem.Addr, private bool) error {
+func tryLockPI(t *kernel.Task, addr hostarch.Addr, private bool) error {
w := t.FutexWaiter()
locked, err := t.Futex().LockPI(w, t, addr, uint32(t.ThreadID()), private, true)
if err != nil {
diff --git a/pkg/sentry/syscalls/linux/sys_getdents.go b/pkg/sentry/syscalls/linux/sys_getdents.go
index b25f7d881..bbba71d8f 100644
--- a/pkg/sentry/syscalls/linux/sys_getdents.go
+++ b/pkg/sentry/syscalls/linux/sys_getdents.go
@@ -19,6 +19,7 @@ import (
"io"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
@@ -62,7 +63,7 @@ func Getdents64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy
// getdents implements the core of getdents(2)/getdents64(2).
// f is the syscall implementation dirent serialization function.
-func getdents(t *kernel.Task, fd int32, addr usermem.Addr, size int, f func(*dirent, io.Writer) (int, error)) (uintptr, error) {
+func getdents(t *kernel.Task, fd int32, addr hostarch.Addr, size int, f func(*dirent, io.Writer) (int, error)) (uintptr, error) {
dir := t.GetFile(fd)
if dir == nil {
return 0, syserror.EBADF
diff --git a/pkg/sentry/syscalls/linux/sys_mempolicy.go b/pkg/sentry/syscalls/linux/sys_mempolicy.go
index 9b4a5c3f1..6d27f4292 100644
--- a/pkg/sentry/syscalls/linux/sys_mempolicy.go
+++ b/pkg/sentry/syscalls/linux/sys_mempolicy.go
@@ -18,6 +18,7 @@ import (
"fmt"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/syserror"
@@ -31,7 +32,7 @@ const (
allowedNodemask = (1 << maxNodes) - 1
)
-func copyInNodemask(t *kernel.Task, addr usermem.Addr, maxnode uint32) (uint64, error) {
+func copyInNodemask(t *kernel.Task, addr hostarch.Addr, maxnode uint32) (uint64, error) {
// "nodemask points to a bit mask of node IDs that contains up to maxnode
// bits. The bit mask size is rounded to the next multiple of
// sizeof(unsigned long), but the kernel will use bits only up to maxnode.
@@ -41,7 +42,7 @@ func copyInNodemask(t *kernel.Task, addr usermem.Addr, maxnode uint32) (uint64,
// because of what appears to be a bug: mm/mempolicy.c:get_nodes() uses
// maxnode-1, not maxnode, as the number of bits.
bits := maxnode - 1
- if bits > usermem.PageSize*8 { // also handles overflow from maxnode == 0
+ if bits > hostarch.PageSize*8 { // also handles overflow from maxnode == 0
return 0, syserror.EINVAL
}
if bits == 0 {
@@ -53,7 +54,7 @@ func copyInNodemask(t *kernel.Task, addr usermem.Addr, maxnode uint32) (uint64,
if _, err := t.CopyInBytes(addr, buf); err != nil {
return 0, err
}
- val := usermem.ByteOrder.Uint64(buf)
+ val := hostarch.ByteOrder.Uint64(buf)
// Check that only allowed bits in the first unsigned long in the nodemask
// are set.
if val&^allowedNodemask != 0 {
@@ -68,11 +69,11 @@ func copyInNodemask(t *kernel.Task, addr usermem.Addr, maxnode uint32) (uint64,
return val, nil
}
-func copyOutNodemask(t *kernel.Task, addr usermem.Addr, maxnode uint32, val uint64) error {
+func copyOutNodemask(t *kernel.Task, addr hostarch.Addr, maxnode uint32, val uint64) error {
// mm/mempolicy.c:copy_nodes_to_user() also uses maxnode-1 as the number of
// bits.
bits := maxnode - 1
- if bits > usermem.PageSize*8 { // also handles overflow from maxnode == 0
+ if bits > hostarch.PageSize*8 { // also handles overflow from maxnode == 0
return syserror.EINVAL
}
if bits == 0 {
@@ -80,7 +81,7 @@ func copyOutNodemask(t *kernel.Task, addr usermem.Addr, maxnode uint32, val uint
}
// Copy out the first unsigned long in the nodemask.
buf := t.CopyScratchBuffer(8)
- usermem.ByteOrder.PutUint64(buf, val)
+ hostarch.ByteOrder.PutUint64(buf, val)
if _, err := t.CopyOutBytes(addr, buf); err != nil {
return err
}
@@ -258,7 +259,7 @@ func Mbind(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
return 0, nil, err
}
-func copyInMempolicyNodemask(t *kernel.Task, modeWithFlags linux.NumaPolicy, nodemask usermem.Addr, maxnode uint32) (linux.NumaPolicy, uint64, error) {
+func copyInMempolicyNodemask(t *kernel.Task, modeWithFlags linux.NumaPolicy, nodemask hostarch.Addr, maxnode uint32) (linux.NumaPolicy, uint64, error) {
flags := linux.NumaPolicy(modeWithFlags & linux.MPOL_MODE_FLAGS)
mode := linux.NumaPolicy(modeWithFlags &^ linux.MPOL_MODE_FLAGS)
if flags == linux.MPOL_MODE_FLAGS {
diff --git a/pkg/sentry/syscalls/linux/sys_mmap.go b/pkg/sentry/syscalls/linux/sys_mmap.go
index cd8dfdfa4..70da0707d 100644
--- a/pkg/sentry/syscalls/linux/sys_mmap.go
+++ b/pkg/sentry/syscalls/linux/sys_mmap.go
@@ -23,7 +23,8 @@ import (
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/mm"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
+
+ "gvisor.dev/gvisor/pkg/hostarch"
)
// Brk implements linux syscall brk(2).
@@ -61,12 +62,12 @@ func Mmap(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC
Unmap: fixed,
Map32Bit: map32bit,
Private: private,
- Perms: usermem.AccessType{
+ Perms: hostarch.AccessType{
Read: linux.PROT_READ&prot != 0,
Write: linux.PROT_WRITE&prot != 0,
Execute: linux.PROT_EXEC&prot != 0,
},
- MaxPerms: usermem.AnyAccess,
+ MaxPerms: hostarch.AnyAccess,
GrowsDown: linux.MAP_GROWSDOWN&flags != 0,
Precommit: linux.MAP_POPULATE&flags != 0,
}
@@ -160,7 +161,7 @@ func Mremap(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
func Mprotect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
length := args[1].Uint64()
prot := args[2].Int()
- err := t.MemoryManager().MProtect(args[0].Pointer(), length, usermem.AccessType{
+ err := t.MemoryManager().MProtect(args[0].Pointer(), length, hostarch.AccessType{
Read: linux.PROT_READ&prot != 0,
Write: linux.PROT_WRITE&prot != 0,
Execute: linux.PROT_EXEC&prot != 0,
@@ -183,7 +184,7 @@ func Madvise(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
return 0, nil, nil
}
// Not explicitly stated: length need not be page-aligned.
- lenAddr, ok := usermem.Addr(length).RoundUp()
+ lenAddr, ok := hostarch.Addr(length).RoundUp()
if !ok {
return 0, nil, syserror.EINVAL
}
@@ -232,7 +233,7 @@ func Mincore(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
// "The length argument need not be a multiple of the page size, but since
// residency information is returned for whole pages, length is effectively
// rounded up to the next multiple of the page size." - mincore(2)
- la, ok := usermem.Addr(length).RoundUp()
+ la, ok := hostarch.Addr(length).RoundUp()
if !ok {
return 0, nil, syserror.ENOMEM
}
@@ -247,7 +248,7 @@ func Mincore(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
if mapped != uint64(la) {
return 0, nil, syserror.ENOMEM
}
- resident := bytes.Repeat([]byte{1}, int(mapped/usermem.PageSize))
+ resident := bytes.Repeat([]byte{1}, int(mapped/hostarch.PageSize))
_, err := t.CopyOutBytes(vec, resident)
return 0, nil, err
}
diff --git a/pkg/sentry/syscalls/linux/sys_mount.go b/pkg/sentry/syscalls/linux/sys_mount.go
index bd0633564..864d2138c 100644
--- a/pkg/sentry/syscalls/linux/sys_mount.go
+++ b/pkg/sentry/syscalls/linux/sys_mount.go
@@ -20,7 +20,8 @@ import (
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
+
+ "gvisor.dev/gvisor/pkg/hostarch"
)
// Mount implements Linux syscall mount(2).
@@ -31,7 +32,7 @@ func Mount(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
flags := args[3].Uint64()
dataAddr := args[4].Pointer()
- fsType, err := t.CopyInString(typeAddr, usermem.PageSize)
+ fsType, err := t.CopyInString(typeAddr, hostarch.PageSize)
if err != nil {
return 0, nil, err
}
@@ -52,7 +53,7 @@ func Mount(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
// character placement, and the address is passed to each file system.
// Most file systems always treat this data as a string, though, and so
// do all of the ones we implement.
- data, err = t.CopyInString(dataAddr, usermem.PageSize)
+ data, err = t.CopyInString(dataAddr, hostarch.PageSize)
if err != nil {
return 0, nil, err
}
diff --git a/pkg/sentry/syscalls/linux/sys_pipe.go b/pkg/sentry/syscalls/linux/sys_pipe.go
index f7135ea46..d95034347 100644
--- a/pkg/sentry/syscalls/linux/sys_pipe.go
+++ b/pkg/sentry/syscalls/linux/sys_pipe.go
@@ -16,19 +16,19 @@ package linux
import (
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/marshal/primitive"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/pipe"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
// LINT.IfChange
// pipe2 implements the actual system call with flags.
-func pipe2(t *kernel.Task, addr usermem.Addr, flags uint) (uintptr, error) {
+func pipe2(t *kernel.Task, addr hostarch.Addr, flags uint) (uintptr, error) {
if flags&^(linux.O_NONBLOCK|linux.O_CLOEXEC) != 0 {
return 0, syserror.EINVAL
}
diff --git a/pkg/sentry/syscalls/linux/sys_poll.go b/pkg/sentry/syscalls/linux/sys_poll.go
index 254f4c9f9..da548a14a 100644
--- a/pkg/sentry/syscalls/linux/sys_poll.go
+++ b/pkg/sentry/syscalls/linux/sys_poll.go
@@ -18,13 +18,13 @@ import (
"time"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sentry/limits"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -155,7 +155,7 @@ func pollBlock(t *kernel.Task, pfd []linux.PollFD, timeout time.Duration) (time.
}
// CopyInPollFDs copies an array of struct pollfd unless nfds exceeds the max.
-func CopyInPollFDs(t *kernel.Task, addr usermem.Addr, nfds uint) ([]linux.PollFD, error) {
+func CopyInPollFDs(t *kernel.Task, addr hostarch.Addr, nfds uint) ([]linux.PollFD, error) {
if uint64(nfds) > t.ThreadGroup().Limits().GetCapped(limits.NumberOfFiles, fileCap) {
return nil, syserror.EINVAL
}
@@ -170,7 +170,7 @@ func CopyInPollFDs(t *kernel.Task, addr usermem.Addr, nfds uint) ([]linux.PollFD
return pfd, nil
}
-func doPoll(t *kernel.Task, addr usermem.Addr, nfds uint, timeout time.Duration) (time.Duration, uintptr, error) {
+func doPoll(t *kernel.Task, addr hostarch.Addr, nfds uint, timeout time.Duration) (time.Duration, uintptr, error) {
pfd, err := CopyInPollFDs(t, addr, nfds)
if err != nil {
return timeout, 0, err
@@ -198,7 +198,7 @@ func doPoll(t *kernel.Task, addr usermem.Addr, nfds uint, timeout time.Duration)
}
// CopyInFDSet copies an fd set from select(2)/pselect(2).
-func CopyInFDSet(t *kernel.Task, addr usermem.Addr, nBytes, nBitsInLastPartialByte int) ([]byte, error) {
+func CopyInFDSet(t *kernel.Task, addr hostarch.Addr, nBytes, nBitsInLastPartialByte int) ([]byte, error) {
set := make([]byte, nBytes)
if addr != 0 {
@@ -215,7 +215,7 @@ func CopyInFDSet(t *kernel.Task, addr usermem.Addr, nBytes, nBitsInLastPartialBy
return set, nil
}
-func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs usermem.Addr, timeout time.Duration) (uintptr, error) {
+func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs hostarch.Addr, timeout time.Duration) (uintptr, error) {
if nfds < 0 || nfds > fileCap {
return 0, syserror.EINVAL
}
@@ -365,7 +365,7 @@ func timeoutRemaining(t *kernel.Task, startNs ktime.Time, timeout time.Duration)
// copyOutTimespecRemaining copies the time remaining in timeout to timespecAddr.
//
// startNs must be from CLOCK_MONOTONIC.
-func copyOutTimespecRemaining(t *kernel.Task, startNs ktime.Time, timeout time.Duration, timespecAddr usermem.Addr) error {
+func copyOutTimespecRemaining(t *kernel.Task, startNs ktime.Time, timeout time.Duration, timespecAddr hostarch.Addr) error {
if timeout <= 0 {
return nil
}
@@ -377,7 +377,7 @@ func copyOutTimespecRemaining(t *kernel.Task, startNs ktime.Time, timeout time.D
// copyOutTimevalRemaining copies the time remaining in timeout to timevalAddr.
//
// startNs must be from CLOCK_MONOTONIC.
-func copyOutTimevalRemaining(t *kernel.Task, startNs ktime.Time, timeout time.Duration, timevalAddr usermem.Addr) error {
+func copyOutTimevalRemaining(t *kernel.Task, startNs ktime.Time, timeout time.Duration, timevalAddr hostarch.Addr) error {
if timeout <= 0 {
return nil
}
@@ -391,7 +391,7 @@ func copyOutTimevalRemaining(t *kernel.Task, startNs ktime.Time, timeout time.Du
//
// +stateify savable
type pollRestartBlock struct {
- pfdAddr usermem.Addr
+ pfdAddr hostarch.Addr
nfds uint
timeout time.Duration
}
@@ -401,7 +401,7 @@ func (p *pollRestartBlock) Restart(t *kernel.Task) (uintptr, error) {
return poll(t, p.pfdAddr, p.nfds, p.timeout)
}
-func poll(t *kernel.Task, pfdAddr usermem.Addr, nfds uint, timeout time.Duration) (uintptr, error) {
+func poll(t *kernel.Task, pfdAddr hostarch.Addr, nfds uint, timeout time.Duration) (uintptr, error) {
remainingTimeout, n, err := doPoll(t, pfdAddr, nfds, timeout)
// On an interrupt poll(2) is restarted with the remaining timeout.
if err == syserror.EINTR {
diff --git a/pkg/sentry/syscalls/linux/sys_random.go b/pkg/sentry/syscalls/linux/sys_random.go
index c0aa0fd60..ae545f80f 100644
--- a/pkg/sentry/syscalls/linux/sys_random.go
+++ b/pkg/sentry/syscalls/linux/sys_random.go
@@ -24,6 +24,8 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
+
+ "gvisor.dev/gvisor/pkg/hostarch"
)
const (
@@ -64,7 +66,7 @@ func GetRandom(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
if min > 256 {
min = 256
}
- n, err := t.MemoryManager().CopyOutFrom(t, usermem.AddrRangeSeqOf(ar), safemem.FromIOReader{&randReader{-1, min}}, usermem.IOOpts{
+ n, err := t.MemoryManager().CopyOutFrom(t, hostarch.AddrRangeSeqOf(ar), safemem.FromIOReader{&randReader{-1, min}}, usermem.IOOpts{
AddressSpaceActive: true,
})
if n >= int64(min) {
diff --git a/pkg/sentry/syscalls/linux/sys_rlimit.go b/pkg/sentry/syscalls/linux/sys_rlimit.go
index 88cd234d1..e64246d57 100644
--- a/pkg/sentry/syscalls/linux/sys_rlimit.go
+++ b/pkg/sentry/syscalls/linux/sys_rlimit.go
@@ -16,12 +16,12 @@ package linux
import (
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/marshal"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/limits"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
// rlimit describes an implementation of 'struct rlimit', which may vary from
@@ -67,12 +67,12 @@ func (r *rlimit64) fromLimit(lim limits.Limit) {
}
}
-func (r *rlimit64) copyIn(t *kernel.Task, addr usermem.Addr) error {
+func (r *rlimit64) copyIn(t *kernel.Task, addr hostarch.Addr) error {
_, err := r.CopyIn(t, addr)
return err
}
-func (r *rlimit64) copyOut(t *kernel.Task, addr usermem.Addr) error {
+func (r *rlimit64) copyOut(t *kernel.Task, addr hostarch.Addr) error {
_, err := r.CopyOut(t, addr)
return err
}
diff --git a/pkg/sentry/syscalls/linux/sys_seccomp.go b/pkg/sentry/syscalls/linux/sys_seccomp.go
index 4fdb4463c..e16d6ff3f 100644
--- a/pkg/sentry/syscalls/linux/sys_seccomp.go
+++ b/pkg/sentry/syscalls/linux/sys_seccomp.go
@@ -17,10 +17,10 @@ package linux
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/bpf"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
// userSockFprog is equivalent to Linux's struct sock_fprog on amd64.
@@ -33,14 +33,14 @@ type userSockFprog struct {
_ [6]byte // padding for alignment
// Filter is a user pointer to the struct sock_filter array that makes up
- // the filter program. Filter is a uint64 rather than a usermem.Addr
- // because usermem.Addr is actually uintptr, which is not a fixed-size
+ // the filter program. Filter is a uint64 rather than a hostarch.Addr
+ // because hostarch.Addr is actually uintptr, which is not a fixed-size
// type.
Filter uint64
}
// seccomp applies a seccomp policy to the current task.
-func seccomp(t *kernel.Task, mode, flags uint64, addr usermem.Addr) error {
+func seccomp(t *kernel.Task, mode, flags uint64, addr hostarch.Addr) error {
// We only support SECCOMP_SET_MODE_FILTER at the moment.
if mode != linux.SECCOMP_SET_MODE_FILTER {
// Unsupported mode.
@@ -60,7 +60,7 @@ func seccomp(t *kernel.Task, mode, flags uint64, addr usermem.Addr) error {
return err
}
filter := make([]linux.BPFInstruction, int(fprog.Len))
- if _, err := linux.CopyBPFInstructionSliceIn(t, usermem.Addr(fprog.Filter), filter); err != nil {
+ if _, err := linux.CopyBPFInstructionSliceIn(t, hostarch.Addr(fprog.Filter), filter); err != nil {
return err
}
compiledFilter, err := bpf.Compile(filter)
diff --git a/pkg/sentry/syscalls/linux/sys_sem.go b/pkg/sentry/syscalls/linux/sys_sem.go
index f0570d927..c84260080 100644
--- a/pkg/sentry/syscalls/linux/sys_sem.go
+++ b/pkg/sentry/syscalls/linux/sys_sem.go
@@ -19,13 +19,13 @@ import (
"time"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/marshal/primitive"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
const opsMax = 500 // SEMOPM
@@ -310,7 +310,7 @@ func setVal(t *kernel.Task, id int32, num int32, val int16) error {
return set.SetVal(t, num, val, creds, int32(pid))
}
-func setValAll(t *kernel.Task, id int32, array usermem.Addr) error {
+func setValAll(t *kernel.Task, id int32, array hostarch.Addr) error {
r := t.IPCNamespace().SemaphoreRegistry()
set := r.FindByID(id)
if set == nil {
@@ -335,7 +335,7 @@ func getVal(t *kernel.Task, id int32, num int32) (int16, error) {
return set.GetVal(num, creds)
}
-func getValAll(t *kernel.Task, id int32, array usermem.Addr) error {
+func getValAll(t *kernel.Task, id int32, array hostarch.Addr) error {
r := t.IPCNamespace().SemaphoreRegistry()
set := r.FindByID(id)
if set == nil {
diff --git a/pkg/sentry/syscalls/linux/sys_signal.go b/pkg/sentry/syscalls/linux/sys_signal.go
index d639c9bf7..53b12dc41 100644
--- a/pkg/sentry/syscalls/linux/sys_signal.go
+++ b/pkg/sentry/syscalls/linux/sys_signal.go
@@ -19,12 +19,12 @@ import (
"time"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/signalfd"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
// "For a process to have permission to send a signal it must
@@ -516,7 +516,7 @@ func RestartSyscall(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kerne
}
// sharedSignalfd is shared between the two calls.
-func sharedSignalfd(t *kernel.Task, fd int32, sigset usermem.Addr, sigsetsize uint, flags int32) (uintptr, *kernel.SyscallControl, error) {
+func sharedSignalfd(t *kernel.Task, fd int32, sigset hostarch.Addr, sigsetsize uint, flags int32) (uintptr, *kernel.SyscallControl, error) {
// Copy in the signal mask.
mask, err := CopyInSigSet(t, sigset, sigsetsize)
if err != nil {
diff --git a/pkg/sentry/syscalls/linux/sys_socket.go b/pkg/sentry/syscalls/linux/sys_socket.go
index c6adfe06b..0141e8a96 100644
--- a/pkg/sentry/syscalls/linux/sys_socket.go
+++ b/pkg/sentry/syscalls/linux/sys_socket.go
@@ -18,6 +18,7 @@ import (
"time"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/marshal"
"gvisor.dev/gvisor/pkg/marshal/primitive"
"gvisor.dev/gvisor/pkg/sentry/arch"
@@ -34,12 +35,6 @@ import (
// LINT.IfChange
-// minListenBacklog is the minimum reasonable backlog for listening sockets.
-const minListenBacklog = 8
-
-// maxListenBacklog is the maximum allowed backlog for listening sockets.
-const maxListenBacklog = 1024
-
// maxAddrLen is the maximum socket address length we're willing to accept.
const maxAddrLen = 200
@@ -117,7 +112,7 @@ type multipleMessageHeader64 struct {
// CaptureAddress allocates memory for and copies a socket address structure
// from the untrusted address space range.
-func CaptureAddress(t *kernel.Task, addr usermem.Addr, addrlen uint32) ([]byte, error) {
+func CaptureAddress(t *kernel.Task, addr hostarch.Addr, addrlen uint32) ([]byte, error) {
if addrlen > maxAddrLen {
return nil, syserror.EINVAL
}
@@ -133,7 +128,7 @@ func CaptureAddress(t *kernel.Task, addr usermem.Addr, addrlen uint32) ([]byte,
// writeAddress writes a sockaddr structure and its length to an output buffer
// in the unstrusted address space range. If the address is bigger than the
// buffer, it is truncated.
-func writeAddress(t *kernel.Task, addr linux.SockAddr, addrLen uint32, addrPtr usermem.Addr, addrLenPtr usermem.Addr) error {
+func writeAddress(t *kernel.Task, addr linux.SockAddr, addrLen uint32, addrPtr hostarch.Addr, addrLenPtr hostarch.Addr) error {
// Get the buffer length.
var bufLen uint32
if _, err := primitive.CopyUint32In(t, addrLenPtr, &bufLen); err != nil {
@@ -276,7 +271,7 @@ func Connect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
// accept is the implementation of the accept syscall. It is called by accept
// and accept4 syscall handlers.
-func accept(t *kernel.Task, fd int32, addr usermem.Addr, addrLen usermem.Addr, flags int) (uintptr, error) {
+func accept(t *kernel.Task, fd int32, addr hostarch.Addr, addrLen hostarch.Addr, flags int) (uintptr, error) {
// Check that no unsupported flags are passed in.
if flags & ^(linux.SOCK_NONBLOCK|linux.SOCK_CLOEXEC) != 0 {
return 0, syserror.EINVAL
@@ -381,14 +376,6 @@ func Listen(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
return 0, nil, syserror.ENOTSOCK
}
- // Per Linux, the backlog is silently capped to reasonable values.
- if backlog <= 0 {
- backlog = minListenBacklog
- }
- if backlog > maxListenBacklog {
- backlog = maxListenBacklog
- }
-
return 0, nil, s.Listen(t, int(backlog)).ToError()
}
@@ -472,7 +459,7 @@ func GetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy
// getSockOpt tries to handle common socket options, or dispatches to a specific
// socket implementation.
-func getSockOpt(t *kernel.Task, s socket.Socket, level, name int, optValAddr usermem.Addr, len int) (marshal.Marshallable, *syserr.Error) {
+func getSockOpt(t *kernel.Task, s socket.Socket, level, name int, optValAddr hostarch.Addr, len int) (marshal.Marshallable, *syserr.Error) {
if level == linux.SOL_SOCKET {
switch name {
case linux.SO_TYPE, linux.SO_DOMAIN, linux.SO_PROTOCOL:
@@ -735,7 +722,7 @@ func RecvMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
return uintptr(count), nil, nil
}
-func recvSingleMsg(t *kernel.Task, s socket.Socket, msgPtr usermem.Addr, flags int32, haveDeadline bool, deadline ktime.Time) (uintptr, error) {
+func recvSingleMsg(t *kernel.Task, s socket.Socket, msgPtr hostarch.Addr, flags int32, haveDeadline bool, deadline ktime.Time) (uintptr, error) {
// Capture the message header and io vectors.
var msg MessageHeader64
if _, err := msg.CopyIn(t, msgPtr); err != nil {
@@ -745,7 +732,7 @@ func recvSingleMsg(t *kernel.Task, s socket.Socket, msgPtr usermem.Addr, flags i
if msg.IovLen > linux.UIO_MAXIOV {
return 0, syserror.EMSGSIZE
}
- dst, err := t.IovecsIOSequence(usermem.Addr(msg.Iov), int(msg.IovLen), usermem.IOOpts{
+ dst, err := t.IovecsIOSequence(hostarch.Addr(msg.Iov), int(msg.IovLen), usermem.IOOpts{
AddressSpaceActive: true,
})
if err != nil {
@@ -796,7 +783,7 @@ func recvSingleMsg(t *kernel.Task, s socket.Socket, msgPtr usermem.Addr, flags i
// Copy the address to the caller.
if msg.NameLen != 0 {
- if err := writeAddress(t, sender, senderLen, usermem.Addr(msg.Name), usermem.Addr(msgPtr+nameLenOffset)); err != nil {
+ if err := writeAddress(t, sender, senderLen, hostarch.Addr(msg.Name), hostarch.Addr(msgPtr+nameLenOffset)); err != nil {
return 0, err
}
}
@@ -806,7 +793,7 @@ func recvSingleMsg(t *kernel.Task, s socket.Socket, msgPtr usermem.Addr, flags i
return 0, err
}
if len(controlData) > 0 {
- if _, err := t.CopyOutBytes(usermem.Addr(msg.Control), controlData); err != nil {
+ if _, err := t.CopyOutBytes(hostarch.Addr(msg.Control), controlData); err != nil {
return 0, err
}
}
@@ -821,7 +808,7 @@ func recvSingleMsg(t *kernel.Task, s socket.Socket, msgPtr usermem.Addr, flags i
// recvFrom is the implementation of the recvfrom syscall. It is called by
// recvfrom and recv syscall handlers.
-func recvFrom(t *kernel.Task, fd int32, bufPtr usermem.Addr, bufLen uint64, flags int32, namePtr usermem.Addr, nameLenPtr usermem.Addr) (uintptr, error) {
+func recvFrom(t *kernel.Task, fd int32, bufPtr hostarch.Addr, bufLen uint64, flags int32, namePtr hostarch.Addr, nameLenPtr hostarch.Addr) (uintptr, error) {
if int(bufLen) < 0 {
return 0, syserror.EINVAL
}
@@ -997,7 +984,7 @@ func SendMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
return uintptr(count), nil, nil
}
-func sendSingleMsg(t *kernel.Task, s socket.Socket, file *fs.File, msgPtr usermem.Addr, flags int32) (uintptr, error) {
+func sendSingleMsg(t *kernel.Task, s socket.Socket, file *fs.File, msgPtr hostarch.Addr, flags int32) (uintptr, error) {
// Capture the message header.
var msg MessageHeader64
if _, err := msg.CopyIn(t, msgPtr); err != nil {
@@ -1011,7 +998,7 @@ func sendSingleMsg(t *kernel.Task, s socket.Socket, file *fs.File, msgPtr userme
return 0, syserror.ENOBUFS
}
controlData = make([]byte, msg.ControlLen)
- if _, err := t.CopyInBytes(usermem.Addr(msg.Control), controlData); err != nil {
+ if _, err := t.CopyInBytes(hostarch.Addr(msg.Control), controlData); err != nil {
return 0, err
}
}
@@ -1020,7 +1007,7 @@ func sendSingleMsg(t *kernel.Task, s socket.Socket, file *fs.File, msgPtr userme
var to []byte
if msg.NameLen != 0 {
var err error
- to, err = CaptureAddress(t, usermem.Addr(msg.Name), msg.NameLen)
+ to, err = CaptureAddress(t, hostarch.Addr(msg.Name), msg.NameLen)
if err != nil {
return 0, err
}
@@ -1030,7 +1017,7 @@ func sendSingleMsg(t *kernel.Task, s socket.Socket, file *fs.File, msgPtr userme
if msg.IovLen > linux.UIO_MAXIOV {
return 0, syserror.EMSGSIZE
}
- src, err := t.IovecsIOSequence(usermem.Addr(msg.Iov), int(msg.IovLen), usermem.IOOpts{
+ src, err := t.IovecsIOSequence(hostarch.Addr(msg.Iov), int(msg.IovLen), usermem.IOOpts{
AddressSpaceActive: true,
})
if err != nil {
@@ -1064,7 +1051,7 @@ func sendSingleMsg(t *kernel.Task, s socket.Socket, file *fs.File, msgPtr userme
// sendTo is the implementation of the sendto syscall. It is called by sendto
// and send syscall handlers.
-func sendTo(t *kernel.Task, fd int32, bufPtr usermem.Addr, bufLen uint64, flags int32, namePtr usermem.Addr, nameLen uint32) (uintptr, error) {
+func sendTo(t *kernel.Task, fd int32, bufPtr hostarch.Addr, bufLen uint64, flags int32, namePtr hostarch.Addr, nameLen uint32) (uintptr, error) {
bl := int(bufLen)
if bl < 0 {
return 0, syserror.EINVAL
diff --git a/pkg/sentry/syscalls/linux/sys_stat.go b/pkg/sentry/syscalls/linux/sys_stat.go
index cda29a8b5..2338ba44b 100644
--- a/pkg/sentry/syscalls/linux/sys_stat.go
+++ b/pkg/sentry/syscalls/linux/sys_stat.go
@@ -16,11 +16,11 @@ package linux
import (
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
// LINT.IfChange
@@ -106,7 +106,7 @@ func Fstat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
}
// stat implements stat from the given *fs.Dirent.
-func stat(t *kernel.Task, d *fs.Dirent, dirPath bool, statAddr usermem.Addr) error {
+func stat(t *kernel.Task, d *fs.Dirent, dirPath bool, statAddr hostarch.Addr) error {
if dirPath && !fs.IsDir(d.Inode.StableAttr) {
return syserror.ENOTDIR
}
@@ -120,7 +120,7 @@ func stat(t *kernel.Task, d *fs.Dirent, dirPath bool, statAddr usermem.Addr) err
}
// fstat implements fstat for the given *fs.File.
-func fstat(t *kernel.Task, f *fs.File, statAddr usermem.Addr) error {
+func fstat(t *kernel.Task, f *fs.File, statAddr hostarch.Addr) error {
uattr, err := f.UnstableAttr(t)
if err != nil {
return err
@@ -180,7 +180,7 @@ func Statx(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
})
}
-func statx(t *kernel.Task, sattr fs.StableAttr, uattr fs.UnstableAttr, statxAddr usermem.Addr) error {
+func statx(t *kernel.Task, sattr fs.StableAttr, uattr fs.UnstableAttr, statxAddr hostarch.Addr) error {
// "[T]he kernel may return fields that weren't requested and may fail to
// return fields that were requested, depending on what the backing
// filesystem supports.
@@ -257,7 +257,7 @@ func Fstatfs(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
// statfsImpl implements the linux syscall statfs and fstatfs based on a Dirent,
// copying the statfs structure out to addr on success, otherwise an error is
// returned.
-func statfsImpl(t *kernel.Task, d *fs.Dirent, addr usermem.Addr) error {
+func statfsImpl(t *kernel.Task, d *fs.Dirent, addr hostarch.Addr) error {
info, err := d.Inode.StatFS(t)
if err != nil {
return err
diff --git a/pkg/sentry/syscalls/linux/sys_thread.go b/pkg/sentry/syscalls/linux/sys_thread.go
index b5f920949..3185ea527 100644
--- a/pkg/sentry/syscalls/linux/sys_thread.go
+++ b/pkg/sentry/syscalls/linux/sys_thread.go
@@ -19,6 +19,7 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/marshal/primitive"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fs"
@@ -46,7 +47,7 @@ var (
ExecMaxTotalSize = 2 * 1024 * 1024
// ExecMaxElemSize is the maximum length of a single argv or envv entry.
- ExecMaxElemSize = 32 * usermem.PageSize
+ ExecMaxElemSize = 32 * hostarch.PageSize
)
// Getppid implements linux syscall getppid(2).
@@ -88,7 +89,7 @@ func Execveat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
return execveat(t, dirFD, pathnameAddr, argvAddr, envvAddr, flags)
}
-func execveat(t *kernel.Task, dirFD int32, pathnameAddr, argvAddr, envvAddr usermem.Addr, flags int32) (uintptr, *kernel.SyscallControl, error) {
+func execveat(t *kernel.Task, dirFD int32, pathnameAddr, argvAddr, envvAddr hostarch.Addr, flags int32) (uintptr, *kernel.SyscallControl, error) {
pathname, err := t.CopyInString(pathnameAddr, linux.PATH_MAX)
if err != nil {
return 0, nil, err
@@ -199,7 +200,7 @@ func ExitGroup(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
}
// clone is used by Clone, Fork, and VFork.
-func clone(t *kernel.Task, flags int, stack usermem.Addr, parentTID usermem.Addr, childTID usermem.Addr, tls usermem.Addr) (uintptr, *kernel.SyscallControl, error) {
+func clone(t *kernel.Task, flags int, stack hostarch.Addr, parentTID hostarch.Addr, childTID hostarch.Addr, tls hostarch.Addr) (uintptr, *kernel.SyscallControl, error) {
opts := kernel.CloneOptions{
SharingOptions: kernel.SharingOptions{
NewAddressSpace: flags&linux.CLONE_VM == 0,
@@ -274,7 +275,7 @@ func parseCommonWaitOptions(wopts *kernel.WaitOptions, options int) error {
}
// wait4 waits for the given child process to exit.
-func wait4(t *kernel.Task, pid int, statusAddr usermem.Addr, options int, rusageAddr usermem.Addr) (uintptr, error) {
+func wait4(t *kernel.Task, pid int, statusAddr hostarch.Addr, options int, rusageAddr hostarch.Addr) (uintptr, error) {
if options&^(linux.WNOHANG|linux.WUNTRACED|linux.WCONTINUED|linux.WNOTHREAD|linux.WALL|linux.WCLONE) != 0 {
return 0, syserror.EINVAL
}
diff --git a/pkg/sentry/syscalls/linux/sys_time.go b/pkg/sentry/syscalls/linux/sys_time.go
index c5054d2f1..83b777bbd 100644
--- a/pkg/sentry/syscalls/linux/sys_time.go
+++ b/pkg/sentry/syscalls/linux/sys_time.go
@@ -19,12 +19,12 @@ import (
"time"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/marshal/primitive"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
// The most significant 29 bits hold either a pid or a file descriptor.
@@ -165,7 +165,7 @@ func Time(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC
addr := args[0].Pointer()
r := t.Kernel().RealtimeClock().Now().TimeT()
- if addr == usermem.Addr(0) {
+ if addr == hostarch.Addr(0) {
return uintptr(r), nil, nil
}
@@ -182,7 +182,7 @@ func Time(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC
type clockNanosleepRestartBlock struct {
c ktime.Clock
duration time.Duration
- rem usermem.Addr
+ rem hostarch.Addr
}
// Restart implements kernel.SyscallRestartBlock.Restart.
@@ -221,7 +221,7 @@ func clockNanosleepUntil(t *kernel.Task, c ktime.Clock, ts linux.Timespec) error
//
// If blocking is interrupted, the syscall is restarted with the remaining
// duration timeout.
-func clockNanosleepFor(t *kernel.Task, c ktime.Clock, dur time.Duration, rem usermem.Addr) error {
+func clockNanosleepFor(t *kernel.Task, c ktime.Clock, dur time.Duration, rem hostarch.Addr) error {
timer, start, tchan := ktime.After(c, dur)
err := t.BlockWithTimer(nil, tchan)
@@ -324,14 +324,14 @@ func Gettimeofday(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.
tv := args[0].Pointer()
tz := args[1].Pointer()
- if tv != usermem.Addr(0) {
+ if tv != hostarch.Addr(0) {
nowTv := t.Kernel().RealtimeClock().Now().Timeval()
if err := copyTimevalOut(t, tv, &nowTv); err != nil {
return 0, nil, err
}
}
- if tz != usermem.Addr(0) {
+ if tz != hostarch.Addr(0) {
// Ask the time package for the timezone.
_, offset := time.Now().Zone()
// This int32 array mimics linux's struct timezone.
diff --git a/pkg/sentry/syscalls/linux/sys_xattr.go b/pkg/sentry/syscalls/linux/sys_xattr.go
index 97474fd3c..28ad6a60e 100644
--- a/pkg/sentry/syscalls/linux/sys_xattr.go
+++ b/pkg/sentry/syscalls/linux/sys_xattr.go
@@ -18,11 +18,11 @@ import (
"strings"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
// LINT.IfChange
@@ -87,7 +87,7 @@ func getXattrFromPath(t *kernel.Task, args arch.SyscallArguments, resolveSymlink
}
// getXattr implements getxattr(2) from the given *fs.Dirent.
-func getXattr(t *kernel.Task, d *fs.Dirent, nameAddr, valueAddr usermem.Addr, size uint64) (int, error) {
+func getXattr(t *kernel.Task, d *fs.Dirent, nameAddr, valueAddr hostarch.Addr, size uint64) (int, error) {
name, err := copyInXattrName(t, nameAddr)
if err != nil {
return 0, err
@@ -180,7 +180,7 @@ func setXattrFromPath(t *kernel.Task, args arch.SyscallArguments, resolveSymlink
}
// setXattr implements setxattr(2) from the given *fs.Dirent.
-func setXattr(t *kernel.Task, d *fs.Dirent, nameAddr, valueAddr usermem.Addr, size uint64, flags uint32) error {
+func setXattr(t *kernel.Task, d *fs.Dirent, nameAddr, valueAddr hostarch.Addr, size uint64, flags uint32) error {
if flags&^(linux.XATTR_CREATE|linux.XATTR_REPLACE) != 0 {
return syserror.EINVAL
}
@@ -214,7 +214,7 @@ func setXattr(t *kernel.Task, d *fs.Dirent, nameAddr, valueAddr usermem.Addr, si
return nil
}
-func copyInXattrName(t *kernel.Task, nameAddr usermem.Addr) (string, error) {
+func copyInXattrName(t *kernel.Task, nameAddr hostarch.Addr) (string, error) {
name, err := t.CopyInString(nameAddr, linux.XATTR_NAME_MAX+1)
if err != nil {
if err == syserror.ENAMETOOLONG {
@@ -306,7 +306,7 @@ func listXattrFromPath(t *kernel.Task, args arch.SyscallArguments, resolveSymlin
return uintptr(n), nil, nil
}
-func listXattr(t *kernel.Task, d *fs.Dirent, addr usermem.Addr, size uint64) (int, error) {
+func listXattr(t *kernel.Task, d *fs.Dirent, addr hostarch.Addr, size uint64) (int, error) {
if !xattrFileTypeOk(d.Inode) {
return 0, nil
}
@@ -408,7 +408,7 @@ func removeXattrFromPath(t *kernel.Task, args arch.SyscallArguments, resolveSyml
}
// removeXattr implements removexattr(2) from the given *fs.Dirent.
-func removeXattr(t *kernel.Task, d *fs.Dirent, nameAddr usermem.Addr) error {
+func removeXattr(t *kernel.Task, d *fs.Dirent, nameAddr hostarch.Addr) error {
name, err := copyInXattrName(t, nameAddr)
if err != nil {
return err
diff --git a/pkg/sentry/syscalls/linux/timespec.go b/pkg/sentry/syscalls/linux/timespec.go
index ddc3ee26e..3edc922eb 100644
--- a/pkg/sentry/syscalls/linux/timespec.go
+++ b/pkg/sentry/syscalls/linux/timespec.go
@@ -18,13 +18,13 @@ import (
"time"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
// copyTimespecIn copies a Timespec from the untrusted app range to the kernel.
-func copyTimespecIn(t *kernel.Task, addr usermem.Addr) (linux.Timespec, error) {
+func copyTimespecIn(t *kernel.Task, addr hostarch.Addr) (linux.Timespec, error) {
switch t.Arch().Width() {
case 8:
ts := linux.Timespec{}
@@ -33,8 +33,8 @@ func copyTimespecIn(t *kernel.Task, addr usermem.Addr) (linux.Timespec, error) {
if err != nil {
return ts, err
}
- ts.Sec = int64(usermem.ByteOrder.Uint64(in[0:]))
- ts.Nsec = int64(usermem.ByteOrder.Uint64(in[8:]))
+ ts.Sec = int64(hostarch.ByteOrder.Uint64(in[0:]))
+ ts.Nsec = int64(hostarch.ByteOrder.Uint64(in[8:]))
return ts, nil
default:
return linux.Timespec{}, syserror.ENOSYS
@@ -42,12 +42,12 @@ func copyTimespecIn(t *kernel.Task, addr usermem.Addr) (linux.Timespec, error) {
}
// copyTimespecOut copies a Timespec to the untrusted app range.
-func copyTimespecOut(t *kernel.Task, addr usermem.Addr, ts *linux.Timespec) error {
+func copyTimespecOut(t *kernel.Task, addr hostarch.Addr, ts *linux.Timespec) error {
switch t.Arch().Width() {
case 8:
out := t.CopyScratchBuffer(16)
- usermem.ByteOrder.PutUint64(out[0:], uint64(ts.Sec))
- usermem.ByteOrder.PutUint64(out[8:], uint64(ts.Nsec))
+ hostarch.ByteOrder.PutUint64(out[0:], uint64(ts.Sec))
+ hostarch.ByteOrder.PutUint64(out[8:], uint64(ts.Nsec))
_, err := t.CopyOutBytes(addr, out)
return err
default:
@@ -56,7 +56,7 @@ func copyTimespecOut(t *kernel.Task, addr usermem.Addr, ts *linux.Timespec) erro
}
// copyTimevalIn copies a Timeval from the untrusted app range to the kernel.
-func copyTimevalIn(t *kernel.Task, addr usermem.Addr) (linux.Timeval, error) {
+func copyTimevalIn(t *kernel.Task, addr hostarch.Addr) (linux.Timeval, error) {
switch t.Arch().Width() {
case 8:
tv := linux.Timeval{}
@@ -65,8 +65,8 @@ func copyTimevalIn(t *kernel.Task, addr usermem.Addr) (linux.Timeval, error) {
if err != nil {
return tv, err
}
- tv.Sec = int64(usermem.ByteOrder.Uint64(in[0:]))
- tv.Usec = int64(usermem.ByteOrder.Uint64(in[8:]))
+ tv.Sec = int64(hostarch.ByteOrder.Uint64(in[0:]))
+ tv.Usec = int64(hostarch.ByteOrder.Uint64(in[8:]))
return tv, nil
default:
return linux.Timeval{}, syserror.ENOSYS
@@ -74,12 +74,12 @@ func copyTimevalIn(t *kernel.Task, addr usermem.Addr) (linux.Timeval, error) {
}
// copyTimevalOut copies a Timeval to the untrusted app range.
-func copyTimevalOut(t *kernel.Task, addr usermem.Addr, tv *linux.Timeval) error {
+func copyTimevalOut(t *kernel.Task, addr hostarch.Addr, tv *linux.Timeval) error {
switch t.Arch().Width() {
case 8:
out := t.CopyScratchBuffer(16)
- usermem.ByteOrder.PutUint64(out[0:], uint64(tv.Sec))
- usermem.ByteOrder.PutUint64(out[8:], uint64(tv.Usec))
+ hostarch.ByteOrder.PutUint64(out[0:], uint64(tv.Sec))
+ hostarch.ByteOrder.PutUint64(out[8:], uint64(tv.Usec))
_, err := t.CopyOutBytes(addr, out)
return err
default:
@@ -94,7 +94,7 @@ func copyTimevalOut(t *kernel.Task, addr usermem.Addr, tv *linux.Timeval) error
// returned value is the maximum that Duration will allow.
//
// If timespecAddr is NULL, the returned value is negative.
-func copyTimespecInToDuration(t *kernel.Task, timespecAddr usermem.Addr) (time.Duration, error) {
+func copyTimespecInToDuration(t *kernel.Task, timespecAddr hostarch.Addr) (time.Duration, error) {
// Use a negative Duration to indicate "no timeout".
timeout := time.Duration(-1)
if timespecAddr != 0 {
diff --git a/pkg/sentry/syscalls/linux/vfs2/BUILD b/pkg/sentry/syscalls/linux/vfs2/BUILD
index 2e59bd5b1..5ce0bc714 100644
--- a/pkg/sentry/syscalls/linux/vfs2/BUILD
+++ b/pkg/sentry/syscalls/linux/vfs2/BUILD
@@ -43,6 +43,7 @@ go_library(
"//pkg/context",
"//pkg/fspath",
"//pkg/gohacks",
+ "//pkg/hostarch",
"//pkg/log",
"//pkg/marshal",
"//pkg/marshal/primitive",
diff --git a/pkg/sentry/syscalls/linux/vfs2/aio.go b/pkg/sentry/syscalls/linux/vfs2/aio.go
index de6789a65..fd1863ef3 100644
--- a/pkg/sentry/syscalls/linux/vfs2/aio.go
+++ b/pkg/sentry/syscalls/linux/vfs2/aio.go
@@ -26,6 +26,8 @@ import (
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
+
+ "gvisor.dev/gvisor/pkg/hostarch"
)
// IoSubmit implements linux syscall io_submit(2).
@@ -40,7 +42,7 @@ func IoSubmit(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
for i := int32(0); i < nrEvents; i++ {
// Copy in the callback address.
- var cbAddr usermem.Addr
+ var cbAddr hostarch.Addr
switch t.Arch().Width() {
case 8:
var cbAddrP primitive.Uint64
@@ -52,7 +54,7 @@ func IoSubmit(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
// Nothing done.
return 0, nil, err
}
- cbAddr = usermem.Addr(cbAddrP)
+ cbAddr = hostarch.Addr(cbAddrP)
default:
return 0, nil, syserror.ENOSYS
}
@@ -79,14 +81,14 @@ func IoSubmit(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
}
// Advance to the next one.
- addr += usermem.Addr(t.Arch().Width())
+ addr += hostarch.Addr(t.Arch().Width())
}
return uintptr(nrEvents), nil, nil
}
// submitCallback processes a single callback.
-func submitCallback(t *kernel.Task, id uint64, cb *linux.IOCallback, cbAddr usermem.Addr) error {
+func submitCallback(t *kernel.Task, id uint64, cb *linux.IOCallback, cbAddr hostarch.Addr) error {
if cb.Reserved2 != 0 {
return syserror.EINVAL
}
@@ -148,7 +150,7 @@ func submitCallback(t *kernel.Task, id uint64, cb *linux.IOCallback, cbAddr user
return nil
}
-func getAIOCallback(t *kernel.Task, fd, eventFD *vfs.FileDescription, cbAddr usermem.Addr, cb *linux.IOCallback, ioseq usermem.IOSequence, aioCtx *mm.AIOContext) kernel.AIOCallback {
+func getAIOCallback(t *kernel.Task, fd, eventFD *vfs.FileDescription, cbAddr hostarch.Addr, cb *linux.IOCallback, ioseq usermem.IOSequence, aioCtx *mm.AIOContext) kernel.AIOCallback {
return func(ctx context.Context) {
// Release references after completing the callback.
defer fd.DecRef(ctx)
@@ -206,12 +208,12 @@ func memoryFor(t *kernel.Task, cb *linux.IOCallback) (usermem.IOSequence, error)
// I/O.
switch cb.OpCode {
case linux.IOCB_CMD_PREAD, linux.IOCB_CMD_PWRITE:
- return t.SingleIOSequence(usermem.Addr(cb.Buf), bytes, usermem.IOOpts{
+ return t.SingleIOSequence(hostarch.Addr(cb.Buf), bytes, usermem.IOOpts{
AddressSpaceActive: false,
})
case linux.IOCB_CMD_PREADV, linux.IOCB_CMD_PWRITEV:
- return t.IovecsIOSequence(usermem.Addr(cb.Buf), bytes, usermem.IOOpts{
+ return t.IovecsIOSequence(hostarch.Addr(cb.Buf), bytes, usermem.IOOpts{
AddressSpaceActive: false,
})
diff --git a/pkg/sentry/syscalls/linux/vfs2/execve.go b/pkg/sentry/syscalls/linux/vfs2/execve.go
index 7a409620d..3315398a4 100644
--- a/pkg/sentry/syscalls/linux/vfs2/execve.go
+++ b/pkg/sentry/syscalls/linux/vfs2/execve.go
@@ -24,7 +24,8 @@ import (
slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
+
+ "gvisor.dev/gvisor/pkg/hostarch"
)
// Execve implements linux syscall execve(2).
@@ -45,7 +46,7 @@ func Execveat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
return execveat(t, dirfd, pathnameAddr, argvAddr, envvAddr, flags)
}
-func execveat(t *kernel.Task, dirfd int32, pathnameAddr, argvAddr, envvAddr usermem.Addr, flags int32) (uintptr, *kernel.SyscallControl, error) {
+func execveat(t *kernel.Task, dirfd int32, pathnameAddr, argvAddr, envvAddr hostarch.Addr, flags int32) (uintptr, *kernel.SyscallControl, error) {
if flags&^(linux.AT_EMPTY_PATH|linux.AT_SYMLINK_NOFOLLOW) != 0 {
return 0, nil, syserror.EINVAL
}
diff --git a/pkg/sentry/syscalls/linux/vfs2/filesystem.go b/pkg/sentry/syscalls/linux/vfs2/filesystem.go
index 01e0f9010..36aa1d3ae 100644
--- a/pkg/sentry/syscalls/linux/vfs2/filesystem.go
+++ b/pkg/sentry/syscalls/linux/vfs2/filesystem.go
@@ -20,7 +20,8 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
+
+ "gvisor.dev/gvisor/pkg/hostarch"
)
// Link implements Linux syscall link(2).
@@ -40,7 +41,7 @@ func Linkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
return 0, nil, linkat(t, olddirfd, oldpathAddr, newdirfd, newpathAddr, flags)
}
-func linkat(t *kernel.Task, olddirfd int32, oldpathAddr usermem.Addr, newdirfd int32, newpathAddr usermem.Addr, flags int32) error {
+func linkat(t *kernel.Task, olddirfd int32, oldpathAddr hostarch.Addr, newdirfd int32, newpathAddr hostarch.Addr, flags int32) error {
if flags&^(linux.AT_EMPTY_PATH|linux.AT_SYMLINK_FOLLOW) != 0 {
return syserror.EINVAL
}
@@ -86,7 +87,7 @@ func Mkdirat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
return 0, nil, mkdirat(t, dirfd, addr, mode)
}
-func mkdirat(t *kernel.Task, dirfd int32, addr usermem.Addr, mode uint) error {
+func mkdirat(t *kernel.Task, dirfd int32, addr hostarch.Addr, mode uint) error {
path, err := copyInPath(t, addr)
if err != nil {
return err
@@ -118,7 +119,7 @@ func Mknodat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
return 0, nil, mknodat(t, dirfd, addr, linux.FileMode(mode), dev)
}
-func mknodat(t *kernel.Task, dirfd int32, addr usermem.Addr, mode linux.FileMode, dev uint32) error {
+func mknodat(t *kernel.Task, dirfd int32, addr hostarch.Addr, mode linux.FileMode, dev uint32) error {
path, err := copyInPath(t, addr)
if err != nil {
return err
@@ -165,7 +166,7 @@ func Creat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
return openat(t, linux.AT_FDCWD, addr, linux.O_WRONLY|linux.O_CREAT|linux.O_TRUNC, mode)
}
-func openat(t *kernel.Task, dirfd int32, pathAddr usermem.Addr, flags uint32, mode uint) (uintptr, *kernel.SyscallControl, error) {
+func openat(t *kernel.Task, dirfd int32, pathAddr hostarch.Addr, flags uint32, mode uint) (uintptr, *kernel.SyscallControl, error) {
path, err := copyInPath(t, pathAddr)
if err != nil {
return 0, nil, err
@@ -217,7 +218,7 @@ func Renameat2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
return 0, nil, renameat(t, olddirfd, oldpathAddr, newdirfd, newpathAddr, flags)
}
-func renameat(t *kernel.Task, olddirfd int32, oldpathAddr usermem.Addr, newdirfd int32, newpathAddr usermem.Addr, flags uint32) error {
+func renameat(t *kernel.Task, olddirfd int32, oldpathAddr hostarch.Addr, newdirfd int32, newpathAddr hostarch.Addr, flags uint32) error {
oldpath, err := copyInPath(t, oldpathAddr)
if err != nil {
return err
@@ -250,7 +251,7 @@ func Rmdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
return 0, nil, rmdirat(t, linux.AT_FDCWD, pathAddr)
}
-func rmdirat(t *kernel.Task, dirfd int32, pathAddr usermem.Addr) error {
+func rmdirat(t *kernel.Task, dirfd int32, pathAddr hostarch.Addr) error {
path, err := copyInPath(t, pathAddr)
if err != nil {
return err
@@ -269,7 +270,7 @@ func Unlink(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
return 0, nil, unlinkat(t, linux.AT_FDCWD, pathAddr)
}
-func unlinkat(t *kernel.Task, dirfd int32, pathAddr usermem.Addr) error {
+func unlinkat(t *kernel.Task, dirfd int32, pathAddr hostarch.Addr) error {
path, err := copyInPath(t, pathAddr)
if err != nil {
return err
@@ -313,7 +314,7 @@ func Symlinkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
return 0, nil, symlinkat(t, targetAddr, newdirfd, linkpathAddr)
}
-func symlinkat(t *kernel.Task, targetAddr usermem.Addr, newdirfd int32, linkpathAddr usermem.Addr) error {
+func symlinkat(t *kernel.Task, targetAddr hostarch.Addr, newdirfd int32, linkpathAddr hostarch.Addr) error {
target, err := t.CopyInString(targetAddr, linux.PATH_MAX)
if err != nil {
return err
diff --git a/pkg/sentry/syscalls/linux/vfs2/getdents.go b/pkg/sentry/syscalls/linux/vfs2/getdents.go
index 5517595b5..b41a3056a 100644
--- a/pkg/sentry/syscalls/linux/vfs2/getdents.go
+++ b/pkg/sentry/syscalls/linux/vfs2/getdents.go
@@ -22,7 +22,8 @@ import (
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
+
+ "gvisor.dev/gvisor/pkg/hostarch"
)
// Getdents implements Linux syscall getdents(2).
@@ -58,7 +59,7 @@ func getdents(t *kernel.Task, args arch.SyscallArguments, isGetdents64 bool) (ui
type getdentsCallback struct {
t *kernel.Task
- addr usermem.Addr
+ addr hostarch.Addr
remaining int
isGetdents64 bool
}
@@ -69,7 +70,7 @@ var getdentsCallbackPool = sync.Pool{
},
}
-func getGetdentsCallback(t *kernel.Task, addr usermem.Addr, size int, isGetdents64 bool) *getdentsCallback {
+func getGetdentsCallback(t *kernel.Task, addr hostarch.Addr, size int, isGetdents64 bool) *getdentsCallback {
cb := getdentsCallbackPool.Get().(*getdentsCallback)
*cb = getdentsCallback{
t: t,
@@ -102,9 +103,9 @@ func (cb *getdentsCallback) Handle(dirent vfs.Dirent) error {
return syserror.EINVAL
}
buf = cb.t.CopyScratchBuffer(size)
- usermem.ByteOrder.PutUint64(buf[0:8], dirent.Ino)
- usermem.ByteOrder.PutUint64(buf[8:16], uint64(dirent.NextOff))
- usermem.ByteOrder.PutUint16(buf[16:18], uint16(size))
+ hostarch.ByteOrder.PutUint64(buf[0:8], dirent.Ino)
+ hostarch.ByteOrder.PutUint64(buf[8:16], uint64(dirent.NextOff))
+ hostarch.ByteOrder.PutUint16(buf[16:18], uint16(size))
buf[18] = dirent.Type
copy(buf[19:], dirent.Name)
// Zero out all remaining bytes in buf, including the NUL terminator
@@ -136,9 +137,9 @@ func (cb *getdentsCallback) Handle(dirent vfs.Dirent) error {
return syserror.EINVAL
}
buf = cb.t.CopyScratchBuffer(size)
- usermem.ByteOrder.PutUint64(buf[0:8], dirent.Ino)
- usermem.ByteOrder.PutUint64(buf[8:16], uint64(dirent.NextOff))
- usermem.ByteOrder.PutUint16(buf[16:18], uint16(size))
+ hostarch.ByteOrder.PutUint64(buf[0:8], dirent.Ino)
+ hostarch.ByteOrder.PutUint64(buf[8:16], uint64(dirent.NextOff))
+ hostarch.ByteOrder.PutUint16(buf[16:18], uint16(size))
copy(buf[18:], dirent.Name)
// Zero out all remaining bytes in buf, including the NUL terminator
// after dirent.Name and the zero padding byte between the name and
@@ -155,7 +156,7 @@ func (cb *getdentsCallback) Handle(dirent vfs.Dirent) error {
// cb.remaining.
return err
}
- cb.addr += usermem.Addr(n)
+ cb.addr += hostarch.Addr(n)
cb.remaining -= n
return nil
}
diff --git a/pkg/sentry/syscalls/linux/vfs2/mmap.go b/pkg/sentry/syscalls/linux/vfs2/mmap.go
index 9d9dbf775..c961545f6 100644
--- a/pkg/sentry/syscalls/linux/vfs2/mmap.go
+++ b/pkg/sentry/syscalls/linux/vfs2/mmap.go
@@ -21,7 +21,8 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
+
+ "gvisor.dev/gvisor/pkg/hostarch"
)
// Mmap implements Linux syscall mmap(2).
@@ -48,12 +49,12 @@ func Mmap(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC
Unmap: fixed,
Map32Bit: map32bit,
Private: private,
- Perms: usermem.AccessType{
+ Perms: hostarch.AccessType{
Read: linux.PROT_READ&prot != 0,
Write: linux.PROT_WRITE&prot != 0,
Execute: linux.PROT_EXEC&prot != 0,
},
- MaxPerms: usermem.AnyAccess,
+ MaxPerms: hostarch.AnyAccess,
GrowsDown: linux.MAP_GROWSDOWN&flags != 0,
Precommit: linux.MAP_POPULATE&flags != 0,
}
diff --git a/pkg/sentry/syscalls/linux/vfs2/mount.go b/pkg/sentry/syscalls/linux/vfs2/mount.go
index 769c9b92f..dd93430e2 100644
--- a/pkg/sentry/syscalls/linux/vfs2/mount.go
+++ b/pkg/sentry/syscalls/linux/vfs2/mount.go
@@ -20,7 +20,8 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
+
+ "gvisor.dev/gvisor/pkg/hostarch"
)
// Mount implements Linux syscall mount(2).
@@ -33,11 +34,11 @@ func Mount(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
// For null-terminated strings related to mount(2), Linux copies in at most
// a page worth of data. See fs/namespace.c:copy_mount_string().
- fsType, err := t.CopyInString(typeAddr, usermem.PageSize)
+ fsType, err := t.CopyInString(typeAddr, hostarch.PageSize)
if err != nil {
return 0, nil, err
}
- source, err := t.CopyInString(sourceAddr, usermem.PageSize)
+ source, err := t.CopyInString(sourceAddr, hostarch.PageSize)
if err != nil {
return 0, nil, err
}
@@ -53,7 +54,7 @@ func Mount(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
// character placement, and the address is passed to each file system.
// Most file systems always treat this data as a string, though, and so
// do all of the ones we implement.
- data, err = t.CopyInString(dataAddr, usermem.PageSize)
+ data, err = t.CopyInString(dataAddr, hostarch.PageSize)
if err != nil {
return 0, nil, err
}
diff --git a/pkg/sentry/syscalls/linux/vfs2/path.go b/pkg/sentry/syscalls/linux/vfs2/path.go
index 90a511d9a..2aaf1ed74 100644
--- a/pkg/sentry/syscalls/linux/vfs2/path.go
+++ b/pkg/sentry/syscalls/linux/vfs2/path.go
@@ -20,10 +20,11 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
+
+ "gvisor.dev/gvisor/pkg/hostarch"
)
-func copyInPath(t *kernel.Task, addr usermem.Addr) (fspath.Path, error) {
+func copyInPath(t *kernel.Task, addr hostarch.Addr) (fspath.Path, error) {
pathname, err := t.CopyInString(addr, linux.PATH_MAX)
if err != nil {
return fspath.Path{}, err
diff --git a/pkg/sentry/syscalls/linux/vfs2/pipe.go b/pkg/sentry/syscalls/linux/vfs2/pipe.go
index 6986e39fe..c6fc1954c 100644
--- a/pkg/sentry/syscalls/linux/vfs2/pipe.go
+++ b/pkg/sentry/syscalls/linux/vfs2/pipe.go
@@ -22,7 +22,8 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
+
+ "gvisor.dev/gvisor/pkg/hostarch"
)
// Pipe implements Linux syscall pipe(2).
@@ -38,7 +39,7 @@ func Pipe2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
return 0, nil, pipe2(t, addr, flags)
}
-func pipe2(t *kernel.Task, addr usermem.Addr, flags int32) error {
+func pipe2(t *kernel.Task, addr hostarch.Addr, flags int32) error {
if flags&^(linux.O_NONBLOCK|linux.O_CLOEXEC) != 0 {
return syserror.EINVAL
}
diff --git a/pkg/sentry/syscalls/linux/vfs2/poll.go b/pkg/sentry/syscalls/linux/vfs2/poll.go
index c22e4ce54..a69c80edd 100644
--- a/pkg/sentry/syscalls/linux/vfs2/poll.go
+++ b/pkg/sentry/syscalls/linux/vfs2/poll.go
@@ -25,8 +25,9 @@ import (
"gvisor.dev/gvisor/pkg/sentry/limits"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
+
+ "gvisor.dev/gvisor/pkg/hostarch"
)
// fileCap is the maximum allowable files for poll & select. This has no
@@ -158,7 +159,7 @@ func pollBlock(t *kernel.Task, pfd []linux.PollFD, timeout time.Duration) (time.
}
// copyInPollFDs copies an array of struct pollfd unless nfds exceeds the max.
-func copyInPollFDs(t *kernel.Task, addr usermem.Addr, nfds uint) ([]linux.PollFD, error) {
+func copyInPollFDs(t *kernel.Task, addr hostarch.Addr, nfds uint) ([]linux.PollFD, error) {
if uint64(nfds) > t.ThreadGroup().Limits().GetCapped(limits.NumberOfFiles, fileCap) {
return nil, syserror.EINVAL
}
@@ -173,7 +174,7 @@ func copyInPollFDs(t *kernel.Task, addr usermem.Addr, nfds uint) ([]linux.PollFD
return pfd, nil
}
-func doPoll(t *kernel.Task, addr usermem.Addr, nfds uint, timeout time.Duration) (time.Duration, uintptr, error) {
+func doPoll(t *kernel.Task, addr hostarch.Addr, nfds uint, timeout time.Duration) (time.Duration, uintptr, error) {
pfd, err := copyInPollFDs(t, addr, nfds)
if err != nil {
return timeout, 0, err
@@ -201,7 +202,7 @@ func doPoll(t *kernel.Task, addr usermem.Addr, nfds uint, timeout time.Duration)
}
// CopyInFDSet copies an fd set from select(2)/pselect(2).
-func CopyInFDSet(t *kernel.Task, addr usermem.Addr, nBytes, nBitsInLastPartialByte int) ([]byte, error) {
+func CopyInFDSet(t *kernel.Task, addr hostarch.Addr, nBytes, nBitsInLastPartialByte int) ([]byte, error) {
set := make([]byte, nBytes)
if addr != 0 {
@@ -218,7 +219,7 @@ func CopyInFDSet(t *kernel.Task, addr usermem.Addr, nBytes, nBitsInLastPartialBy
return set, nil
}
-func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs usermem.Addr, timeout time.Duration) (uintptr, error) {
+func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs hostarch.Addr, timeout time.Duration) (uintptr, error) {
if nfds < 0 || nfds > fileCap {
return 0, syserror.EINVAL
}
@@ -368,7 +369,7 @@ func timeoutRemaining(t *kernel.Task, startNs ktime.Time, timeout time.Duration)
// copyOutTimespecRemaining copies the time remaining in timeout to timespecAddr.
//
// startNs must be from CLOCK_MONOTONIC.
-func copyOutTimespecRemaining(t *kernel.Task, startNs ktime.Time, timeout time.Duration, timespecAddr usermem.Addr) error {
+func copyOutTimespecRemaining(t *kernel.Task, startNs ktime.Time, timeout time.Duration, timespecAddr hostarch.Addr) error {
if timeout <= 0 {
return nil
}
@@ -381,7 +382,7 @@ func copyOutTimespecRemaining(t *kernel.Task, startNs ktime.Time, timeout time.D
// copyOutTimevalRemaining copies the time remaining in timeout to timevalAddr.
//
// startNs must be from CLOCK_MONOTONIC.
-func copyOutTimevalRemaining(t *kernel.Task, startNs ktime.Time, timeout time.Duration, timevalAddr usermem.Addr) error {
+func copyOutTimevalRemaining(t *kernel.Task, startNs ktime.Time, timeout time.Duration, timevalAddr hostarch.Addr) error {
if timeout <= 0 {
return nil
}
@@ -396,7 +397,7 @@ func copyOutTimevalRemaining(t *kernel.Task, startNs ktime.Time, timeout time.Du
//
// +stateify savable
type pollRestartBlock struct {
- pfdAddr usermem.Addr
+ pfdAddr hostarch.Addr
nfds uint
timeout time.Duration
}
@@ -406,7 +407,7 @@ func (p *pollRestartBlock) Restart(t *kernel.Task) (uintptr, error) {
return poll(t, p.pfdAddr, p.nfds, p.timeout)
}
-func poll(t *kernel.Task, pfdAddr usermem.Addr, nfds uint, timeout time.Duration) (uintptr, error) {
+func poll(t *kernel.Task, pfdAddr hostarch.Addr, nfds uint, timeout time.Duration) (uintptr, error) {
remainingTimeout, n, err := doPoll(t, pfdAddr, nfds, timeout)
// On an interrupt poll(2) is restarted with the remaining timeout.
if err == syserror.EINTR {
@@ -530,7 +531,7 @@ func Pselect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
if _, err := maskStruct.CopyIn(t, maskWithSizeAddr); err != nil {
return 0, nil, err
}
- if err := setTempSignalSet(t, usermem.Addr(maskStruct.sigsetAddr), uint(maskStruct.sizeofSigset)); err != nil {
+ if err := setTempSignalSet(t, hostarch.Addr(maskStruct.sigsetAddr), uint(maskStruct.sizeofSigset)); err != nil {
return 0, nil, err
}
}
@@ -551,7 +552,7 @@ func Pselect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
// returned value is the maximum that Duration will allow.
//
// If timespecAddr is NULL, the returned value is negative.
-func copyTimespecInToDuration(t *kernel.Task, timespecAddr usermem.Addr) (time.Duration, error) {
+func copyTimespecInToDuration(t *kernel.Task, timespecAddr hostarch.Addr) (time.Duration, error) {
// Use a negative Duration to indicate "no timeout".
timeout := time.Duration(-1)
if timespecAddr != 0 {
@@ -567,7 +568,7 @@ func copyTimespecInToDuration(t *kernel.Task, timespecAddr usermem.Addr) (time.D
return timeout, nil
}
-func setTempSignalSet(t *kernel.Task, maskAddr usermem.Addr, maskSize uint) error {
+func setTempSignalSet(t *kernel.Task, maskAddr hostarch.Addr, maskSize uint) error {
if maskAddr == 0 {
return nil
}
diff --git a/pkg/sentry/syscalls/linux/vfs2/setstat.go b/pkg/sentry/syscalls/linux/vfs2/setstat.go
index 903169dc2..c6330c21a 100644
--- a/pkg/sentry/syscalls/linux/vfs2/setstat.go
+++ b/pkg/sentry/syscalls/linux/vfs2/setstat.go
@@ -23,7 +23,8 @@ import (
"gvisor.dev/gvisor/pkg/sentry/limits"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
+
+ "gvisor.dev/gvisor/pkg/hostarch"
)
const chmodMask = 0777 | linux.S_ISUID | linux.S_ISGID | linux.S_ISVTX
@@ -43,7 +44,7 @@ func Fchmodat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
return 0, nil, fchmodat(t, dirfd, pathAddr, mode)
}
-func fchmodat(t *kernel.Task, dirfd int32, pathAddr usermem.Addr, mode uint) error {
+func fchmodat(t *kernel.Task, dirfd int32, pathAddr hostarch.Addr, mode uint) error {
path, err := copyInPath(t, pathAddr)
if err != nil {
return err
@@ -102,7 +103,7 @@ func Fchownat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
return 0, nil, fchownat(t, dirfd, pathAddr, owner, group, flags)
}
-func fchownat(t *kernel.Task, dirfd int32, pathAddr usermem.Addr, owner, group, flags int32) error {
+func fchownat(t *kernel.Task, dirfd int32, pathAddr hostarch.Addr, owner, group, flags int32) error {
if flags&^(linux.AT_EMPTY_PATH|linux.AT_SYMLINK_NOFOLLOW) != 0 {
return syserror.EINVAL
}
@@ -327,7 +328,7 @@ func Futimesat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
return 0, nil, setstatat(t, dirfd, path, shouldAllowEmptyPath, followFinalSymlink, &opts)
}
-func populateSetStatOptionsForUtimes(t *kernel.Task, timesAddr usermem.Addr, opts *vfs.SetStatOptions) error {
+func populateSetStatOptionsForUtimes(t *kernel.Task, timesAddr hostarch.Addr, opts *vfs.SetStatOptions) error {
if timesAddr == 0 {
opts.Stat.Mask = linux.STATX_ATIME | linux.STATX_MTIME
opts.Stat.Atime.Nsec = linux.UTIME_NOW
@@ -391,7 +392,7 @@ func Utimensat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
return 0, nil, setstatat(t, dirfd, path, shouldAllowEmptyPath, shouldFollowFinalSymlink(flags&linux.AT_SYMLINK_NOFOLLOW == 0), &opts)
}
-func populateSetStatOptionsForUtimens(t *kernel.Task, timesAddr usermem.Addr, opts *vfs.SetStatOptions) error {
+func populateSetStatOptionsForUtimens(t *kernel.Task, timesAddr hostarch.Addr, opts *vfs.SetStatOptions) error {
if timesAddr == 0 {
opts.Stat.Mask = linux.STATX_ATIME | linux.STATX_MTIME
opts.Stat.Atime.Nsec = linux.UTIME_NOW
diff --git a/pkg/sentry/syscalls/linux/vfs2/signal.go b/pkg/sentry/syscalls/linux/vfs2/signal.go
index b89f34cdb..6163da103 100644
--- a/pkg/sentry/syscalls/linux/vfs2/signal.go
+++ b/pkg/sentry/syscalls/linux/vfs2/signal.go
@@ -21,11 +21,12 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel"
slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
+
+ "gvisor.dev/gvisor/pkg/hostarch"
)
// sharedSignalfd is shared between the two calls.
-func sharedSignalfd(t *kernel.Task, fd int32, sigset usermem.Addr, sigsetsize uint, flags int32) (uintptr, *kernel.SyscallControl, error) {
+func sharedSignalfd(t *kernel.Task, fd int32, sigset hostarch.Addr, sigsetsize uint, flags int32) (uintptr, *kernel.SyscallControl, error) {
// Copy in the signal mask.
mask, err := slinux.CopyInSigSet(t, sigset, sigsetsize)
if err != nil {
diff --git a/pkg/sentry/syscalls/linux/vfs2/socket.go b/pkg/sentry/syscalls/linux/vfs2/socket.go
index 346fd1cea..7cc0be892 100644
--- a/pkg/sentry/syscalls/linux/vfs2/socket.go
+++ b/pkg/sentry/syscalls/linux/vfs2/socket.go
@@ -31,13 +31,9 @@ import (
"gvisor.dev/gvisor/pkg/syserr"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
-)
-
-// minListenBacklog is the minimum reasonable backlog for listening sockets.
-const minListenBacklog = 8
-// maxListenBacklog is the maximum allowed backlog for listening sockets.
-const maxListenBacklog = 1024
+ "gvisor.dev/gvisor/pkg/hostarch"
+)
// maxAddrLen is the maximum socket address length we're willing to accept.
const maxAddrLen = 200
@@ -116,7 +112,7 @@ type multipleMessageHeader64 struct {
// CaptureAddress allocates memory for and copies a socket address structure
// from the untrusted address space range.
-func CaptureAddress(t *kernel.Task, addr usermem.Addr, addrlen uint32) ([]byte, error) {
+func CaptureAddress(t *kernel.Task, addr hostarch.Addr, addrlen uint32) ([]byte, error) {
if addrlen > maxAddrLen {
return nil, syserror.EINVAL
}
@@ -132,7 +128,7 @@ func CaptureAddress(t *kernel.Task, addr usermem.Addr, addrlen uint32) ([]byte,
// writeAddress writes a sockaddr structure and its length to an output buffer
// in the unstrusted address space range. If the address is bigger than the
// buffer, it is truncated.
-func writeAddress(t *kernel.Task, addr linux.SockAddr, addrLen uint32, addrPtr usermem.Addr, addrLenPtr usermem.Addr) error {
+func writeAddress(t *kernel.Task, addr linux.SockAddr, addrLen uint32, addrPtr hostarch.Addr, addrLenPtr hostarch.Addr) error {
// Get the buffer length.
var bufLen uint32
if _, err := primitive.CopyUint32In(t, addrLenPtr, &bufLen); err != nil {
@@ -279,7 +275,7 @@ func Connect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
// accept is the implementation of the accept syscall. It is called by accept
// and accept4 syscall handlers.
-func accept(t *kernel.Task, fd int32, addr usermem.Addr, addrLen usermem.Addr, flags int) (uintptr, error) {
+func accept(t *kernel.Task, fd int32, addr hostarch.Addr, addrLen hostarch.Addr, flags int) (uintptr, error) {
// Check that no unsupported flags are passed in.
if flags & ^(linux.SOCK_NONBLOCK|linux.SOCK_CLOEXEC) != 0 {
return 0, syserror.EINVAL
@@ -384,14 +380,6 @@ func Listen(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
return 0, nil, syserror.ENOTSOCK
}
- // Per Linux, the backlog is silently capped to reasonable values.
- if backlog <= 0 {
- backlog = minListenBacklog
- }
- if backlog > maxListenBacklog {
- backlog = maxListenBacklog
- }
-
return 0, nil, s.Listen(t, int(backlog)).ToError()
}
@@ -475,7 +463,7 @@ func GetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy
// getSockOpt tries to handle common socket options, or dispatches to a specific
// socket implementation.
-func getSockOpt(t *kernel.Task, s socket.SocketVFS2, level, name int, optValAddr usermem.Addr, len int) (marshal.Marshallable, *syserr.Error) {
+func getSockOpt(t *kernel.Task, s socket.SocketVFS2, level, name int, optValAddr hostarch.Addr, len int) (marshal.Marshallable, *syserr.Error) {
if level == linux.SOL_SOCKET {
switch name {
case linux.SO_TYPE, linux.SO_DOMAIN, linux.SO_PROTOCOL:
@@ -738,7 +726,7 @@ func RecvMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
return uintptr(count), nil, nil
}
-func recvSingleMsg(t *kernel.Task, s socket.SocketVFS2, msgPtr usermem.Addr, flags int32, haveDeadline bool, deadline ktime.Time) (uintptr, error) {
+func recvSingleMsg(t *kernel.Task, s socket.SocketVFS2, msgPtr hostarch.Addr, flags int32, haveDeadline bool, deadline ktime.Time) (uintptr, error) {
// Capture the message header and io vectors.
var msg MessageHeader64
if _, err := msg.CopyIn(t, msgPtr); err != nil {
@@ -748,7 +736,7 @@ func recvSingleMsg(t *kernel.Task, s socket.SocketVFS2, msgPtr usermem.Addr, fla
if msg.IovLen > linux.UIO_MAXIOV {
return 0, syserror.EMSGSIZE
}
- dst, err := t.IovecsIOSequence(usermem.Addr(msg.Iov), int(msg.IovLen), usermem.IOOpts{
+ dst, err := t.IovecsIOSequence(hostarch.Addr(msg.Iov), int(msg.IovLen), usermem.IOOpts{
AddressSpaceActive: true,
})
if err != nil {
@@ -799,7 +787,7 @@ func recvSingleMsg(t *kernel.Task, s socket.SocketVFS2, msgPtr usermem.Addr, fla
// Copy the address to the caller.
if msg.NameLen != 0 {
- if err := writeAddress(t, sender, senderLen, usermem.Addr(msg.Name), usermem.Addr(msgPtr+nameLenOffset)); err != nil {
+ if err := writeAddress(t, sender, senderLen, hostarch.Addr(msg.Name), hostarch.Addr(msgPtr+nameLenOffset)); err != nil {
return 0, err
}
}
@@ -809,7 +797,7 @@ func recvSingleMsg(t *kernel.Task, s socket.SocketVFS2, msgPtr usermem.Addr, fla
return 0, err
}
if len(controlData) > 0 {
- if _, err := t.CopyOutBytes(usermem.Addr(msg.Control), controlData); err != nil {
+ if _, err := t.CopyOutBytes(hostarch.Addr(msg.Control), controlData); err != nil {
return 0, err
}
}
@@ -824,7 +812,7 @@ func recvSingleMsg(t *kernel.Task, s socket.SocketVFS2, msgPtr usermem.Addr, fla
// recvFrom is the implementation of the recvfrom syscall. It is called by
// recvfrom and recv syscall handlers.
-func recvFrom(t *kernel.Task, fd int32, bufPtr usermem.Addr, bufLen uint64, flags int32, namePtr usermem.Addr, nameLenPtr usermem.Addr) (uintptr, error) {
+func recvFrom(t *kernel.Task, fd int32, bufPtr hostarch.Addr, bufLen uint64, flags int32, namePtr hostarch.Addr, nameLenPtr hostarch.Addr) (uintptr, error) {
if int(bufLen) < 0 {
return 0, syserror.EINVAL
}
@@ -1000,7 +988,7 @@ func SendMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
return uintptr(count), nil, nil
}
-func sendSingleMsg(t *kernel.Task, s socket.SocketVFS2, file *vfs.FileDescription, msgPtr usermem.Addr, flags int32) (uintptr, error) {
+func sendSingleMsg(t *kernel.Task, s socket.SocketVFS2, file *vfs.FileDescription, msgPtr hostarch.Addr, flags int32) (uintptr, error) {
// Capture the message header.
var msg MessageHeader64
if _, err := msg.CopyIn(t, msgPtr); err != nil {
@@ -1014,7 +1002,7 @@ func sendSingleMsg(t *kernel.Task, s socket.SocketVFS2, file *vfs.FileDescriptio
return 0, syserror.ENOBUFS
}
controlData = make([]byte, msg.ControlLen)
- if _, err := t.CopyInBytes(usermem.Addr(msg.Control), controlData); err != nil {
+ if _, err := t.CopyInBytes(hostarch.Addr(msg.Control), controlData); err != nil {
return 0, err
}
}
@@ -1023,7 +1011,7 @@ func sendSingleMsg(t *kernel.Task, s socket.SocketVFS2, file *vfs.FileDescriptio
var to []byte
if msg.NameLen != 0 {
var err error
- to, err = CaptureAddress(t, usermem.Addr(msg.Name), msg.NameLen)
+ to, err = CaptureAddress(t, hostarch.Addr(msg.Name), msg.NameLen)
if err != nil {
return 0, err
}
@@ -1033,7 +1021,7 @@ func sendSingleMsg(t *kernel.Task, s socket.SocketVFS2, file *vfs.FileDescriptio
if msg.IovLen > linux.UIO_MAXIOV {
return 0, syserror.EMSGSIZE
}
- src, err := t.IovecsIOSequence(usermem.Addr(msg.Iov), int(msg.IovLen), usermem.IOOpts{
+ src, err := t.IovecsIOSequence(hostarch.Addr(msg.Iov), int(msg.IovLen), usermem.IOOpts{
AddressSpaceActive: true,
})
if err != nil {
@@ -1067,7 +1055,7 @@ func sendSingleMsg(t *kernel.Task, s socket.SocketVFS2, file *vfs.FileDescriptio
// sendTo is the implementation of the sendto syscall. It is called by sendto
// and send syscall handlers.
-func sendTo(t *kernel.Task, fd int32, bufPtr usermem.Addr, bufLen uint64, flags int32, namePtr usermem.Addr, nameLen uint32) (uintptr, error) {
+func sendTo(t *kernel.Task, fd int32, bufPtr hostarch.Addr, bufLen uint64, flags int32, namePtr hostarch.Addr, nameLen uint32) (uintptr, error) {
bl := int(bufLen)
if bl < 0 {
return 0, syserror.EINVAL
diff --git a/pkg/sentry/syscalls/linux/vfs2/stat.go b/pkg/sentry/syscalls/linux/vfs2/stat.go
index 0f5d5189c..69e77fa99 100644
--- a/pkg/sentry/syscalls/linux/vfs2/stat.go
+++ b/pkg/sentry/syscalls/linux/vfs2/stat.go
@@ -24,7 +24,8 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
+
+ "gvisor.dev/gvisor/pkg/hostarch"
)
// Stat implements Linux syscall stat(2).
@@ -50,7 +51,7 @@ func Newfstatat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy
return 0, nil, fstatat(t, dirfd, pathAddr, statAddr, flags)
}
-func fstatat(t *kernel.Task, dirfd int32, pathAddr, statAddr usermem.Addr, flags int32) error {
+func fstatat(t *kernel.Task, dirfd int32, pathAddr, statAddr hostarch.Addr, flags int32) error {
if flags&^(linux.AT_EMPTY_PATH|linux.AT_SYMLINK_NOFOLLOW) != 0 {
return syserror.EINVAL
}
@@ -264,7 +265,7 @@ func Faccessat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
return 0, nil, accessAt(t, dirfd, addr, mode)
}
-func accessAt(t *kernel.Task, dirfd int32, pathAddr usermem.Addr, mode uint) error {
+func accessAt(t *kernel.Task, dirfd int32, pathAddr hostarch.Addr, mode uint) error {
const rOK = 4
const wOK = 2
const xOK = 1
@@ -312,7 +313,7 @@ func Readlinkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy
return readlinkat(t, dirfd, pathAddr, bufAddr, size)
}
-func readlinkat(t *kernel.Task, dirfd int32, pathAddr, bufAddr usermem.Addr, size uint) (uintptr, *kernel.SyscallControl, error) {
+func readlinkat(t *kernel.Task, dirfd int32, pathAddr, bufAddr hostarch.Addr, size uint) (uintptr, *kernel.SyscallControl, error) {
if int(size) <= 0 {
return 0, nil, syserror.EINVAL
}
diff --git a/pkg/sentry/syscalls/linux/vfs2/xattr.go b/pkg/sentry/syscalls/linux/vfs2/xattr.go
index e05723ef9..c261050c6 100644
--- a/pkg/sentry/syscalls/linux/vfs2/xattr.go
+++ b/pkg/sentry/syscalls/linux/vfs2/xattr.go
@@ -23,7 +23,8 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
+
+ "gvisor.dev/gvisor/pkg/hostarch"
)
// ListXattr implements Linux syscall listxattr(2).
@@ -291,7 +292,7 @@ func Fremovexattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.
return 0, nil, file.RemoveXattr(t, name)
}
-func copyInXattrName(t *kernel.Task, nameAddr usermem.Addr) (string, error) {
+func copyInXattrName(t *kernel.Task, nameAddr hostarch.Addr) (string, error) {
name, err := t.CopyInString(nameAddr, linux.XATTR_NAME_MAX+1)
if err != nil {
if err == syserror.ENAMETOOLONG {
@@ -305,7 +306,7 @@ func copyInXattrName(t *kernel.Task, nameAddr usermem.Addr) (string, error) {
return name, nil
}
-func copyOutXattrNameList(t *kernel.Task, listAddr usermem.Addr, size uint, names []string) (int, error) {
+func copyOutXattrNameList(t *kernel.Task, listAddr hostarch.Addr, size uint, names []string) (int, error) {
if size > linux.XATTR_LIST_MAX {
size = linux.XATTR_LIST_MAX
}
@@ -327,7 +328,7 @@ func copyOutXattrNameList(t *kernel.Task, listAddr usermem.Addr, size uint, name
return t.CopyOutBytes(listAddr, buf.Bytes())
}
-func copyInXattrValue(t *kernel.Task, valueAddr usermem.Addr, size uint) (string, error) {
+func copyInXattrValue(t *kernel.Task, valueAddr hostarch.Addr, size uint) (string, error) {
if size > linux.XATTR_SIZE_MAX {
return "", syserror.E2BIG
}
@@ -338,7 +339,7 @@ func copyInXattrValue(t *kernel.Task, valueAddr usermem.Addr, size uint) (string
return gohacks.StringFromImmutableBytes(buf), nil
}
-func copyOutXattrValue(t *kernel.Task, valueAddr usermem.Addr, size uint, value string) (int, error) {
+func copyOutXattrValue(t *kernel.Task, valueAddr hostarch.Addr, size uint, value string) (int, error) {
if size > linux.XATTR_SIZE_MAX {
size = linux.XATTR_SIZE_MAX
}
diff --git a/pkg/sentry/vfs/BUILD b/pkg/sentry/vfs/BUILD
index df4990854..ac60fe8bf 100644
--- a/pkg/sentry/vfs/BUILD
+++ b/pkg/sentry/vfs/BUILD
@@ -99,6 +99,7 @@ go_library(
"//pkg/fdnotifier",
"//pkg/fspath",
"//pkg/gohacks",
+ "//pkg/hostarch",
"//pkg/log",
"//pkg/refs",
"//pkg/refsvfs2",
diff --git a/pkg/sentry/vfs/anonfs.go b/pkg/sentry/vfs/anonfs.go
index 3caf417ca..f48817132 100644
--- a/pkg/sentry/vfs/anonfs.go
+++ b/pkg/sentry/vfs/anonfs.go
@@ -20,10 +20,10 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fspath"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
// NewAnonVirtualDentry returns a VirtualDentry with the given synthetic name,
@@ -43,7 +43,7 @@ func (vfs *VirtualFilesystem) NewAnonVirtualDentry(name string) VirtualDentry {
}
const (
- anonfsBlockSize = usermem.PageSize // via fs/libfs.c:pseudo_fs_fill_super()
+ anonfsBlockSize = hostarch.PageSize // via fs/libfs.c:pseudo_fs_fill_super()
// Mode, UID, and GID for a generic anonfs file.
anonFileMode = 0600 // no type is correct
diff --git a/pkg/sentry/vfs/file_description_impl_util.go b/pkg/sentry/vfs/file_description_impl_util.go
index 1556b41a3..b87d9690a 100644
--- a/pkg/sentry/vfs/file_description_impl_util.go
+++ b/pkg/sentry/vfs/file_description_impl_util.go
@@ -252,6 +252,9 @@ type WritableDynamicBytesSource interface {
// are backed by a bytes.Buffer that is regenerated when necessary, consistent
// with Linux's fs/seq_file.c:single_open().
//
+// If data additionally implements WritableDynamicBytesSource, writes are
+// dispatched to the implementer. The source data is not automatically modified.
+//
// DynamicBytesFileDescriptionImpl.SetDataSource() must be called before first
// use.
//
diff --git a/pkg/sentry/vfs/filesystem_impl_util.go b/pkg/sentry/vfs/filesystem_impl_util.go
index 2620cf975..15b234d61 100644
--- a/pkg/sentry/vfs/filesystem_impl_util.go
+++ b/pkg/sentry/vfs/filesystem_impl_util.go
@@ -18,7 +18,7 @@ import (
"strings"
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/usermem"
+ "gvisor.dev/gvisor/pkg/hostarch"
)
// GenericParseMountOptions parses a comma-separated list of options of the
@@ -50,7 +50,7 @@ func GenericParseMountOptions(str string) map[string]string {
func GenericStatFS(fsMagic uint64) linux.Statfs {
return linux.Statfs{
Type: fsMagic,
- BlockSize: usermem.PageSize,
+ BlockSize: hostarch.PageSize,
NameLength: linux.NAME_MAX,
}
}
diff --git a/pkg/sentry/vfs/inotify.go b/pkg/sentry/vfs/inotify.go
index 32fa01578..49d29e20b 100644
--- a/pkg/sentry/vfs/inotify.go
+++ b/pkg/sentry/vfs/inotify.go
@@ -21,6 +21,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/uniqueid"
"gvisor.dev/gvisor/pkg/sync"
@@ -256,7 +257,7 @@ func (i *Inotify) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallAr
n += uint32(e.sizeOf())
}
var buf [4]byte
- usermem.ByteOrder.PutUint32(buf[:], n)
+ hostarch.ByteOrder.PutUint32(buf[:], n)
_, err := uio.CopyOut(ctx, args[2].Pointer(), buf[:], usermem.IOOpts{})
return 0, err
@@ -683,10 +684,10 @@ func (e *Event) sizeOf() int {
// construct the output. We use a buffer allocated ahead of time for
// performance. buf must be at least inotifyEventBaseSize bytes.
func (e *Event) CopyTo(ctx context.Context, buf []byte, dst usermem.IOSequence) (int64, error) {
- usermem.ByteOrder.PutUint32(buf[0:], uint32(e.wd))
- usermem.ByteOrder.PutUint32(buf[4:], e.mask)
- usermem.ByteOrder.PutUint32(buf[8:], e.cookie)
- usermem.ByteOrder.PutUint32(buf[12:], e.len)
+ hostarch.ByteOrder.PutUint32(buf[0:], uint32(e.wd))
+ hostarch.ByteOrder.PutUint32(buf[4:], e.mask)
+ hostarch.ByteOrder.PutUint32(buf[8:], e.cookie)
+ hostarch.ByteOrder.PutUint32(buf[12:], e.len)
writeLen := 0
diff --git a/pkg/sentry/vfs/mount.go b/pkg/sentry/vfs/mount.go
index 922f9e697..7cdab6945 100644
--- a/pkg/sentry/vfs/mount.go
+++ b/pkg/sentry/vfs/mount.go
@@ -970,17 +970,22 @@ func superBlockOpts(mountPath string, mnt *Mount) string {
opts += "," + mopts
}
- // NOTE(b/147673608): If the mount is a cgroup, we also need to include
- // the cgroup name in the options. For now we just read that from the
- // path.
+ // NOTE(b/147673608): If the mount is a ramdisk-based fake cgroupfs, we also
+ // need to include the cgroup name in the options. For now we just read that
+ // from the path. Note that this is only possible when "cgroup" isn't
+ // registered as a valid filesystem type.
//
- // TODO(gvisor.dev/issue/190): Once gVisor has full cgroup support, we
- // should get this value from the cgroup itself, and not rely on the
- // path.
+ // TODO(gvisor.dev/issue/190): Once we removed fake cgroupfs support, we
+ // should remove this.
+ if cgroupfs := mnt.vfs.getFilesystemType("cgroup"); cgroupfs != nil && cgroupfs.opts.AllowUserMount {
+ // Real cgroupfs available.
+ return opts
+ }
if mnt.fs.FilesystemType().Name() == "cgroup" {
splitPath := strings.Split(mountPath, "/")
cgroupType := splitPath[len(splitPath)-1]
opts += "," + cgroupType
}
+
return opts
}
diff --git a/pkg/tcpip/header/ipv4.go b/pkg/tcpip/header/ipv4.go
index f588311e0..85bd164cd 100644
--- a/pkg/tcpip/header/ipv4.go
+++ b/pkg/tcpip/header/ipv4.go
@@ -178,6 +178,26 @@ const (
IPv4FlagDontFragment
)
+// ipv4LinkLocalUnicastSubnet is the IPv4 link local unicast subnet as defined
+// by RFC 3927 section 1.
+var ipv4LinkLocalUnicastSubnet = func() tcpip.Subnet {
+ subnet, err := tcpip.NewSubnet("\xa9\xfe\x00\x00", tcpip.AddressMask("\xff\xff\x00\x00"))
+ if err != nil {
+ panic(err)
+ }
+ return subnet
+}()
+
+// ipv4LinkLocalMulticastSubnet is the IPv4 link local multicast subnet as
+// defined by RFC 5771 section 4.
+var ipv4LinkLocalMulticastSubnet = func() tcpip.Subnet {
+ subnet, err := tcpip.NewSubnet("\xe0\x00\x00\x00", tcpip.AddressMask("\xff\xff\xff\x00"))
+ if err != nil {
+ panic(err)
+ }
+ return subnet
+}()
+
// IPv4EmptySubnet is the empty IPv4 subnet.
var IPv4EmptySubnet = func() tcpip.Subnet {
subnet, err := tcpip.NewSubnet(IPv4Any, tcpip.AddressMask(IPv4Any))
@@ -423,6 +443,18 @@ func (b IPv4) IsValid(pktSize int) bool {
return true
}
+// IsV4LinkLocalUnicastAddress determines if the provided address is an IPv4
+// link-local unicast address.
+func IsV4LinkLocalUnicastAddress(addr tcpip.Address) bool {
+ return ipv4LinkLocalUnicastSubnet.Contains(addr)
+}
+
+// IsV4LinkLocalMulticastAddress determines if the provided address is an IPv4
+// link-local multicast address.
+func IsV4LinkLocalMulticastAddress(addr tcpip.Address) bool {
+ return ipv4LinkLocalMulticastSubnet.Contains(addr)
+}
+
// IsV4MulticastAddress determines if the provided address is an IPv4 multicast
// address (range 224.0.0.0 to 239.255.255.255). The four most significant bits
// will be 1110 = 0xe0.
diff --git a/pkg/tcpip/header/ipv4_test.go b/pkg/tcpip/header/ipv4_test.go
index 6475cd694..c02fe898b 100644
--- a/pkg/tcpip/header/ipv4_test.go
+++ b/pkg/tcpip/header/ipv4_test.go
@@ -18,6 +18,7 @@ import (
"testing"
"github.com/google/go-cmp/cmp"
+ "gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
"gvisor.dev/gvisor/pkg/tcpip/header"
)
@@ -177,3 +178,77 @@ func TestIPv4EncodeOptions(t *testing.T) {
})
}
}
+
+func TestIsV4LinkLocalUnicastAddress(t *testing.T) {
+ tests := []struct {
+ name string
+ addr tcpip.Address
+ expected bool
+ }{
+ {
+ name: "Valid (lowest)",
+ addr: "\xa9\xfe\x00\x00",
+ expected: true,
+ },
+ {
+ name: "Valid (highest)",
+ addr: "\xa9\xfe\xff\xff",
+ expected: true,
+ },
+ {
+ name: "Invalid (before subnet)",
+ addr: "\xa9\xfd\xff\xff",
+ expected: false,
+ },
+ {
+ name: "Invalid (after subnet)",
+ addr: "\xa9\xff\x00\x00",
+ expected: false,
+ },
+ }
+
+ for _, test := range tests {
+ t.Run(test.name, func(t *testing.T) {
+ if got := header.IsV4LinkLocalUnicastAddress(test.addr); got != test.expected {
+ t.Errorf("got header.IsV4LinkLocalUnicastAddress(%s) = %t, want = %t", test.addr, got, test.expected)
+ }
+ })
+ }
+}
+
+func TestIsV4LinkLocalMulticastAddress(t *testing.T) {
+ tests := []struct {
+ name string
+ addr tcpip.Address
+ expected bool
+ }{
+ {
+ name: "Valid (lowest)",
+ addr: "\xe0\x00\x00\x00",
+ expected: true,
+ },
+ {
+ name: "Valid (highest)",
+ addr: "\xe0\x00\x00\xff",
+ expected: true,
+ },
+ {
+ name: "Invalid (before subnet)",
+ addr: "\xdf\xff\xff\xff",
+ expected: false,
+ },
+ {
+ name: "Invalid (after subnet)",
+ addr: "\xe0\x00\x01\x00",
+ expected: false,
+ },
+ }
+
+ for _, test := range tests {
+ t.Run(test.name, func(t *testing.T) {
+ if got := header.IsV4LinkLocalMulticastAddress(test.addr); got != test.expected {
+ t.Errorf("got header.IsV4LinkLocalMulticastAddress(%s) = %t, want = %t", test.addr, got, test.expected)
+ }
+ })
+ }
+}
diff --git a/pkg/tcpip/header/ipv6.go b/pkg/tcpip/header/ipv6.go
index f2403978c..fa6ccff30 100644
--- a/pkg/tcpip/header/ipv6.go
+++ b/pkg/tcpip/header/ipv6.go
@@ -98,12 +98,27 @@ const (
// The address is ff02::1.
IPv6AllNodesMulticastAddress tcpip.Address = "\xff\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01"
- // IPv6AllRoutersMulticastAddress is a link-local multicast group that
- // all IPv6 routers MUST join, as per RFC 4291, section 2.8. Packets
+ // IPv6AllRoutersInterfaceLocalMulticastAddress is an interface-local
+ // multicast group that all IPv6 routers MUST join, as per RFC 4291, section
+ // 2.8. Packets destined to this address will reach the router on an
+ // interface.
+ //
+ // The address is ff01::2.
+ IPv6AllRoutersInterfaceLocalMulticastAddress tcpip.Address = "\xff\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02"
+
+ // IPv6AllRoutersLinkLocalMulticastAddress is a link-local multicast group
+ // that all IPv6 routers MUST join, as per RFC 4291, section 2.8. Packets
// destined to this address will reach all routers on a link.
//
// The address is ff02::2.
- IPv6AllRoutersMulticastAddress tcpip.Address = "\xff\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02"
+ IPv6AllRoutersLinkLocalMulticastAddress tcpip.Address = "\xff\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02"
+
+ // IPv6AllRoutersSiteLocalMulticastAddress is a site-local multicast group
+ // that all IPv6 routers MUST join, as per RFC 4291, section 2.8. Packets
+ // destined to this address will reach all routers in a site.
+ //
+ // The address is ff05::2.
+ IPv6AllRoutersSiteLocalMulticastAddress tcpip.Address = "\xff\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02"
// IPv6MinimumMTU is the minimum MTU required by IPv6, per RFC 8200,
// section 5:
@@ -142,11 +157,6 @@ const (
// ipv6MulticastAddressScopeMask is the mask for the scope (scop) field,
// within the byte holding the field, as per RFC 4291 section 2.7.
ipv6MulticastAddressScopeMask = 0xF
-
- // ipv6LinkLocalMulticastScope is the value of the scope (scop) field within
- // a multicast IPv6 address that indicates the address has link-local scope,
- // as per RFC 4291 section 2.7.
- ipv6LinkLocalMulticastScope = 2
)
// IPv6EmptySubnet is the empty IPv6 subnet. It may also be known as the
@@ -399,7 +409,7 @@ func IsV6LoopbackAddress(addr tcpip.Address) bool {
// IsV6LinkLocalMulticastAddress determines if the provided address is an IPv6
// link-local multicast address.
func IsV6LinkLocalMulticastAddress(addr tcpip.Address) bool {
- return IsV6MulticastAddress(addr) && addr[ipv6MulticastAddressScopeByteIdx]&ipv6MulticastAddressScopeMask == ipv6LinkLocalMulticastScope
+ return IsV6MulticastAddress(addr) && V6MulticastScope(addr) == IPv6LinkLocalMulticastScope
}
// AppendOpaqueInterfaceIdentifier appends a 64 bit opaque interface identifier
@@ -520,3 +530,45 @@ func GenerateTempIPv6SLAACAddr(tempIIDHistory []byte, stableAddr tcpip.Address)
PrefixLen: IIDOffsetInIPv6Address * 8,
}
}
+
+// IPv6MulticastScope is the scope of a multicast IPv6 address.
+type IPv6MulticastScope uint8
+
+// The various values for IPv6 multicast scopes, as per RFC 7346 section 2:
+//
+// +------+--------------------------+-------------------------+
+// | scop | NAME | REFERENCE |
+// +------+--------------------------+-------------------------+
+// | 0 | Reserved | [RFC4291], RFC 7346 |
+// | 1 | Interface-Local scope | [RFC4291], RFC 7346 |
+// | 2 | Link-Local scope | [RFC4291], RFC 7346 |
+// | 3 | Realm-Local scope | [RFC4291], RFC 7346 |
+// | 4 | Admin-Local scope | [RFC4291], RFC 7346 |
+// | 5 | Site-Local scope | [RFC4291], RFC 7346 |
+// | 6 | Unassigned | |
+// | 7 | Unassigned | |
+// | 8 | Organization-Local scope | [RFC4291], RFC 7346 |
+// | 9 | Unassigned | |
+// | A | Unassigned | |
+// | B | Unassigned | |
+// | C | Unassigned | |
+// | D | Unassigned | |
+// | E | Global scope | [RFC4291], RFC 7346 |
+// | F | Reserved | [RFC4291], RFC 7346 |
+// +------+--------------------------+-------------------------+
+const (
+ IPv6Reserved0MulticastScope = IPv6MulticastScope(0x0)
+ IPv6InterfaceLocalMulticastScope = IPv6MulticastScope(0x1)
+ IPv6LinkLocalMulticastScope = IPv6MulticastScope(0x2)
+ IPv6RealmLocalMulticastScope = IPv6MulticastScope(0x3)
+ IPv6AdminLocalMulticastScope = IPv6MulticastScope(0x4)
+ IPv6SiteLocalMulticastScope = IPv6MulticastScope(0x5)
+ IPv6OrganizationLocalMulticastScope = IPv6MulticastScope(0x8)
+ IPv6GlobalMulticastScope = IPv6MulticastScope(0xE)
+ IPv6ReservedFMulticastScope = IPv6MulticastScope(0xF)
+)
+
+// V6MulticastScope returns the scope of a multicast address.
+func V6MulticastScope(addr tcpip.Address) IPv6MulticastScope {
+ return IPv6MulticastScope(addr[ipv6MulticastAddressScopeByteIdx] & ipv6MulticastAddressScopeMask)
+}
diff --git a/pkg/tcpip/header/ipv6_test.go b/pkg/tcpip/header/ipv6_test.go
index f10f446a6..38b6dbc18 100644
--- a/pkg/tcpip/header/ipv6_test.go
+++ b/pkg/tcpip/header/ipv6_test.go
@@ -373,3 +373,83 @@ func TestSolicitedNodeAddr(t *testing.T) {
})
}
}
+
+func TestV6MulticastScope(t *testing.T) {
+ tests := []struct {
+ addr tcpip.Address
+ want header.IPv6MulticastScope
+ }{
+ {
+ addr: "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01",
+ want: header.IPv6Reserved0MulticastScope,
+ },
+ {
+ addr: "\xff\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01",
+ want: header.IPv6InterfaceLocalMulticastScope,
+ },
+ {
+ addr: "\xff\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01",
+ want: header.IPv6LinkLocalMulticastScope,
+ },
+ {
+ addr: "\xff\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01",
+ want: header.IPv6RealmLocalMulticastScope,
+ },
+ {
+ addr: "\xff\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01",
+ want: header.IPv6AdminLocalMulticastScope,
+ },
+ {
+ addr: "\xff\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01",
+ want: header.IPv6SiteLocalMulticastScope,
+ },
+ {
+ addr: "\xff\x06\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01",
+ want: header.IPv6MulticastScope(6),
+ },
+ {
+ addr: "\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01",
+ want: header.IPv6MulticastScope(7),
+ },
+ {
+ addr: "\xff\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01",
+ want: header.IPv6OrganizationLocalMulticastScope,
+ },
+ {
+ addr: "\xff\x09\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01",
+ want: header.IPv6MulticastScope(9),
+ },
+ {
+ addr: "\xff\x0a\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01",
+ want: header.IPv6MulticastScope(10),
+ },
+ {
+ addr: "\xff\x0b\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01",
+ want: header.IPv6MulticastScope(11),
+ },
+ {
+ addr: "\xff\x0c\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01",
+ want: header.IPv6MulticastScope(12),
+ },
+ {
+ addr: "\xff\x0d\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01",
+ want: header.IPv6MulticastScope(13),
+ },
+ {
+ addr: "\xff\x0e\x06\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01",
+ want: header.IPv6GlobalMulticastScope,
+ },
+ {
+ addr: "\xff\x0f\x06\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01",
+ want: header.IPv6ReservedFMulticastScope,
+ },
+ }
+
+ for _, test := range tests {
+ t.Run(fmt.Sprintf("%s", test.addr), func(t *testing.T) {
+ if got := header.V6MulticastScope(test.addr); got != test.want {
+ t.Fatalf("got header.V6MulticastScope(%s) = %d, want = %d", test.addr, got, test.want)
+ }
+ })
+ }
+}
diff --git a/pkg/tcpip/network/internal/ip/generic_multicast_protocol.go b/pkg/tcpip/network/internal/ip/generic_multicast_protocol.go
index b9f129728..ac35d81e7 100644
--- a/pkg/tcpip/network/internal/ip/generic_multicast_protocol.go
+++ b/pkg/tcpip/network/internal/ip/generic_multicast_protocol.go
@@ -156,14 +156,6 @@ type GenericMulticastProtocolOptions struct {
//
// Unsolicited reports are transmitted when a group is newly joined.
MaxUnsolicitedReportDelay time.Duration
-
- // AllNodesAddress is a multicast address that all nodes on a network should
- // be a member of.
- //
- // This address will not have the generic multicast protocol performed on it;
- // it will be left in the non member/listener state, and packets will never
- // be sent for it.
- AllNodesAddress tcpip.Address
}
// MulticastGroupProtocol is a multicast group protocol whose core state machine
@@ -188,6 +180,10 @@ type MulticastGroupProtocol interface {
// SendLeave sends a multicast leave for the specified group address.
SendLeave(groupAddress tcpip.Address) tcpip.Error
+
+ // ShouldPerformProtocol returns true iff the protocol should be performed for
+ // the specified group.
+ ShouldPerformProtocol(tcpip.Address) bool
}
// GenericMulticastProtocolState is the per interface generic multicast protocol
@@ -455,20 +451,7 @@ func (g *GenericMulticastProtocolState) initializeNewMemberLocked(groupAddress t
info.lastToSendReport = false
- if groupAddress == g.opts.AllNodesAddress {
- // As per RFC 2236 section 6 page 10 (for IGMPv2),
- //
- // The all-systems group (address 224.0.0.1) is handled as a special
- // case. The host starts in Idle Member state for that group on every
- // interface, never transitions to another state, and never sends a
- // report for that group.
- //
- // As per RFC 2710 section 5 page 10 (for MLDv1),
- //
- // The link-scope all-nodes address (FF02::1) is handled as a special
- // case. The node starts in Idle Listener state for that address on
- // every interface, never transitions to another state, and never sends
- // a Report or Done for that address.
+ if !g.opts.Protocol.ShouldPerformProtocol(groupAddress) {
info.state = idleMember
return
}
@@ -537,20 +520,7 @@ func (g *GenericMulticastProtocolState) maybeSendLeave(groupAddress tcpip.Addres
return
}
- if groupAddress == g.opts.AllNodesAddress {
- // As per RFC 2236 section 6 page 10 (for IGMPv2),
- //
- // The all-systems group (address 224.0.0.1) is handled as a special
- // case. The host starts in Idle Member state for that group on every
- // interface, never transitions to another state, and never sends a
- // report for that group.
- //
- // As per RFC 2710 section 5 page 10 (for MLDv1),
- //
- // The link-scope all-nodes address (FF02::1) is handled as a special
- // case. The node starts in Idle Listener state for that address on
- // every interface, never transitions to another state, and never sends
- // a Report or Done for that address.
+ if !g.opts.Protocol.ShouldPerformProtocol(groupAddress) {
return
}
@@ -627,20 +597,7 @@ func (g *GenericMulticastProtocolState) setDelayTimerForAddressRLocked(groupAddr
return
}
- if groupAddress == g.opts.AllNodesAddress {
- // As per RFC 2236 section 6 page 10 (for IGMPv2),
- //
- // The all-systems group (address 224.0.0.1) is handled as a special
- // case. The host starts in Idle Member state for that group on every
- // interface, never transitions to another state, and never sends a
- // report for that group.
- //
- // As per RFC 2710 section 5 page 10 (for MLDv1),
- //
- // The link-scope all-nodes address (FF02::1) is handled as a special
- // case. The node starts in Idle Listener state for that address on
- // every interface, never transitions to another state, and never sends
- // a Report or Done for that address.
+ if !g.opts.Protocol.ShouldPerformProtocol(groupAddress) {
return
}
diff --git a/pkg/tcpip/network/internal/ip/generic_multicast_protocol_test.go b/pkg/tcpip/network/internal/ip/generic_multicast_protocol_test.go
index 381460c82..0b51563cd 100644
--- a/pkg/tcpip/network/internal/ip/generic_multicast_protocol_test.go
+++ b/pkg/tcpip/network/internal/ip/generic_multicast_protocol_test.go
@@ -43,6 +43,8 @@ type mockMulticastGroupProtocolProtectedFields struct {
type mockMulticastGroupProtocol struct {
t *testing.T
+ skipProtocolAddress tcpip.Address
+
mu mockMulticastGroupProtocolProtectedFields
}
@@ -165,6 +167,11 @@ func (m *mockMulticastGroupProtocol) SendLeave(groupAddress tcpip.Address) tcpip
return nil
}
+// ShouldPerformProtocol implements ip.MulticastGroupProtocol.
+func (m *mockMulticastGroupProtocol) ShouldPerformProtocol(groupAddress tcpip.Address) bool {
+ return groupAddress != m.skipProtocolAddress
+}
+
func (m *mockMulticastGroupProtocol) check(sendReportGroupAddresses []tcpip.Address, sendLeaveGroupAddresses []tcpip.Address) string {
m.mu.Lock()
defer m.mu.Unlock()
@@ -193,10 +200,11 @@ func (m *mockMulticastGroupProtocol) check(sendReportGroupAddresses []tcpip.Addr
cmp.FilterPath(
func(p cmp.Path) bool {
switch p.Last().String() {
- case ".RWMutex", ".t", ".makeQueuePackets", ".disabled", ".genericMulticastGroup":
+ case ".RWMutex", ".t", ".makeQueuePackets", ".disabled", ".genericMulticastGroup", ".skipProtocolAddress":
return true
+ default:
+ return false
}
- return false
},
cmp.Ignore(),
),
@@ -225,14 +233,13 @@ func TestJoinGroup(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
- mgp := mockMulticastGroupProtocol{t: t}
+ mgp := mockMulticastGroupProtocol{t: t, skipProtocolAddress: addr2}
clock := faketime.NewManualClock()
mgp.init(ip.GenericMulticastProtocolOptions{
Rand: rand.New(rand.NewSource(0)),
Clock: clock,
MaxUnsolicitedReportDelay: maxUnsolicitedReportDelay,
- AllNodesAddress: addr2,
})
// Joining a group should send a report immediately and another after
@@ -279,14 +286,13 @@ func TestLeaveGroup(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
- mgp := mockMulticastGroupProtocol{t: t}
+ mgp := mockMulticastGroupProtocol{t: t, skipProtocolAddress: addr2}
clock := faketime.NewManualClock()
mgp.init(ip.GenericMulticastProtocolOptions{
Rand: rand.New(rand.NewSource(1)),
Clock: clock,
MaxUnsolicitedReportDelay: maxUnsolicitedReportDelay,
- AllNodesAddress: addr2,
})
mgp.joinGroup(test.addr)
@@ -356,14 +362,13 @@ func TestHandleReport(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
- mgp := mockMulticastGroupProtocol{t: t}
+ mgp := mockMulticastGroupProtocol{t: t, skipProtocolAddress: addr3}
clock := faketime.NewManualClock()
mgp.init(ip.GenericMulticastProtocolOptions{
Rand: rand.New(rand.NewSource(2)),
Clock: clock,
MaxUnsolicitedReportDelay: maxUnsolicitedReportDelay,
- AllNodesAddress: addr3,
})
mgp.joinGroup(addr1)
@@ -446,14 +451,13 @@ func TestHandleQuery(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
- mgp := mockMulticastGroupProtocol{t: t}
+ mgp := mockMulticastGroupProtocol{t: t, skipProtocolAddress: addr3}
clock := faketime.NewManualClock()
mgp.init(ip.GenericMulticastProtocolOptions{
Rand: rand.New(rand.NewSource(3)),
Clock: clock,
MaxUnsolicitedReportDelay: maxUnsolicitedReportDelay,
- AllNodesAddress: addr3,
})
mgp.joinGroup(addr1)
@@ -574,14 +578,13 @@ func TestJoinCount(t *testing.T) {
}
func TestMakeAllNonMemberAndInitialize(t *testing.T) {
- mgp := mockMulticastGroupProtocol{t: t}
+ mgp := mockMulticastGroupProtocol{t: t, skipProtocolAddress: addr3}
clock := faketime.NewManualClock()
mgp.init(ip.GenericMulticastProtocolOptions{
Rand: rand.New(rand.NewSource(3)),
Clock: clock,
MaxUnsolicitedReportDelay: maxUnsolicitedReportDelay,
- AllNodesAddress: addr3,
})
mgp.joinGroup(addr1)
diff --git a/pkg/tcpip/network/ip_test.go b/pkg/tcpip/network/ip_test.go
index a4edc69c7..58fd18af8 100644
--- a/pkg/tcpip/network/ip_test.go
+++ b/pkg/tcpip/network/ip_test.go
@@ -15,6 +15,7 @@
package ip_test
import (
+ "fmt"
"strings"
"testing"
@@ -1938,3 +1939,80 @@ func TestICMPInclusionSize(t *testing.T) {
})
}
}
+
+func TestJoinLeaveAllRoutersGroup(t *testing.T) {
+ const nicID = 1
+
+ tests := []struct {
+ name string
+ netProto tcpip.NetworkProtocolNumber
+ protoFactory stack.NetworkProtocolFactory
+ allRoutersAddr tcpip.Address
+ }{
+ {
+ name: "IPv4",
+ netProto: ipv4.ProtocolNumber,
+ protoFactory: ipv4.NewProtocol,
+ allRoutersAddr: header.IPv4AllRoutersGroup,
+ },
+ {
+ name: "IPv6 Interface Local",
+ netProto: ipv6.ProtocolNumber,
+ protoFactory: ipv6.NewProtocol,
+ allRoutersAddr: header.IPv6AllRoutersInterfaceLocalMulticastAddress,
+ },
+ {
+ name: "IPv6 Link Local",
+ netProto: ipv6.ProtocolNumber,
+ protoFactory: ipv6.NewProtocol,
+ allRoutersAddr: header.IPv6AllRoutersLinkLocalMulticastAddress,
+ },
+ {
+ name: "IPv6 Site Local",
+ netProto: ipv6.ProtocolNumber,
+ protoFactory: ipv6.NewProtocol,
+ allRoutersAddr: header.IPv6AllRoutersSiteLocalMulticastAddress,
+ },
+ }
+
+ for _, test := range tests {
+ t.Run(test.name, func(t *testing.T) {
+ for _, nicDisabled := range [...]bool{true, false} {
+ t.Run(fmt.Sprintf("NIC Disabled = %t", nicDisabled), func(t *testing.T) {
+ s := stack.New(stack.Options{
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol, ipv6.NewProtocol},
+ TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol, tcp.NewProtocol},
+ })
+ opts := stack.NICOptions{Disabled: nicDisabled}
+ if err := s.CreateNICWithOptions(nicID, channel.New(0, 0, ""), opts); err != nil {
+ t.Fatalf("CreateNICWithOptions(%d, _, %#v) = %s", nicID, opts, err)
+ }
+
+ if got, err := s.IsInGroup(nicID, test.allRoutersAddr); err != nil {
+ t.Fatalf("s.IsInGroup(%d, %s): %s", nicID, test.allRoutersAddr, err)
+ } else if got {
+ t.Fatalf("got s.IsInGroup(%d, %s) = true, want = false", nicID, test.allRoutersAddr)
+ }
+
+ if err := s.SetForwarding(test.netProto, true); err != nil {
+ t.Fatalf("s.SetForwarding(%d, true): %s", test.netProto, err)
+ }
+ if got, err := s.IsInGroup(nicID, test.allRoutersAddr); err != nil {
+ t.Fatalf("s.IsInGroup(%d, %s): %s", nicID, test.allRoutersAddr, err)
+ } else if !got {
+ t.Fatalf("got s.IsInGroup(%d, %s) = false, want = true", nicID, test.allRoutersAddr)
+ }
+
+ if err := s.SetForwarding(test.netProto, false); err != nil {
+ t.Fatalf("s.SetForwarding(%d, false): %s", test.netProto, err)
+ }
+ if got, err := s.IsInGroup(nicID, test.allRoutersAddr); err != nil {
+ t.Fatalf("s.IsInGroup(%d, %s): %s", nicID, test.allRoutersAddr, err)
+ } else if got {
+ t.Fatalf("got s.IsInGroup(%d, %s) = true, want = false", nicID, test.allRoutersAddr)
+ }
+ })
+ }
+ })
+ }
+}
diff --git a/pkg/tcpip/network/ipv4/igmp.go b/pkg/tcpip/network/ipv4/igmp.go
index f3fc1c87e..b1ac29294 100644
--- a/pkg/tcpip/network/ipv4/igmp.go
+++ b/pkg/tcpip/network/ipv4/igmp.go
@@ -126,6 +126,17 @@ func (igmp *igmpState) SendLeave(groupAddress tcpip.Address) tcpip.Error {
return err
}
+// ShouldPerformProtocol implements ip.MulticastGroupProtocol.
+func (igmp *igmpState) ShouldPerformProtocol(groupAddress tcpip.Address) bool {
+ // As per RFC 2236 section 6 page 10,
+ //
+ // The all-systems group (address 224.0.0.1) is handled as a special
+ // case. The host starts in Idle Member state for that group on every
+ // interface, never transitions to another state, and never sends a
+ // report for that group.
+ return groupAddress != header.IPv4AllSystems
+}
+
// init sets up an igmpState struct, and is required to be called before using
// a new igmpState.
//
@@ -137,7 +148,6 @@ func (igmp *igmpState) init(ep *endpoint) {
Clock: ep.protocol.stack.Clock(),
Protocol: igmp,
MaxUnsolicitedReportDelay: UnsolicitedReportIntervalMax,
- AllNodesAddress: header.IPv4AllSystems,
})
igmp.igmpV1Present = igmpV1PresentDefault
igmp.igmpV1Job = ep.protocol.stack.NewJob(&ep.mu, func() {
diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go
index 1a5661ca4..2e44f8523 100644
--- a/pkg/tcpip/network/ipv4/ipv4.go
+++ b/pkg/tcpip/network/ipv4/ipv4.go
@@ -150,6 +150,38 @@ func (p *protocol) forgetEndpoint(nicID tcpip.NICID) {
delete(p.mu.eps, nicID)
}
+// transitionForwarding transitions the endpoint's forwarding status to
+// forwarding.
+//
+// Must only be called when the forwarding status changes.
+func (e *endpoint) transitionForwarding(forwarding bool) {
+ e.mu.Lock()
+ defer e.mu.Unlock()
+
+ if forwarding {
+ // There does not seem to be an RFC requirement for a node to join the all
+ // routers multicast address but
+ // https://www.iana.org/assignments/multicast-addresses/multicast-addresses.xhtml
+ // specifies the address as a group for all routers on a subnet so we join
+ // the group here.
+ if err := e.joinGroupLocked(header.IPv4AllRoutersGroup); err != nil {
+ // joinGroupLocked only returns an error if the group address is not a
+ // valid IPv4 multicast address.
+ panic(fmt.Sprintf("e.joinGroupLocked(%s): %s", header.IPv4AllRoutersGroup, err))
+ }
+
+ return
+ }
+
+ switch err := e.leaveGroupLocked(header.IPv4AllRoutersGroup).(type) {
+ case nil:
+ case *tcpip.ErrBadLocalAddress:
+ // The endpoint may have already left the multicast group.
+ default:
+ panic(fmt.Sprintf("e.leaveGroupLocked(%s): %s", header.IPv4AllRoutersGroup, err))
+ }
+}
+
// Enable implements stack.NetworkEndpoint.
func (e *endpoint) Enable() tcpip.Error {
e.mu.Lock()
@@ -226,7 +258,7 @@ func (e *endpoint) disableLocked() {
}
// The endpoint may have already left the multicast group.
- switch err := e.leaveGroupLocked(header.IPv4AllSystems); err.(type) {
+ switch err := e.leaveGroupLocked(header.IPv4AllSystems).(type) {
case nil, *tcpip.ErrBadLocalAddress:
default:
panic(fmt.Sprintf("unexpected error when leaving group = %s: %s", header.IPv4AllSystems, err))
@@ -551,6 +583,22 @@ func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBu
// forwardPacket attempts to forward a packet to its final destination.
func (e *endpoint) forwardPacket(pkt *stack.PacketBuffer) tcpip.Error {
h := header.IPv4(pkt.NetworkHeader().View())
+
+ dstAddr := h.DestinationAddress()
+ if header.IsV4LinkLocalUnicastAddress(h.SourceAddress()) || header.IsV4LinkLocalUnicastAddress(dstAddr) || header.IsV4LinkLocalMulticastAddress(dstAddr) {
+ // As per RFC 3927 section 7,
+ //
+ // A router MUST NOT forward a packet with an IPv4 Link-Local source or
+ // destination address, irrespective of the router's default route
+ // configuration or routes obtained from dynamic routing protocols.
+ //
+ // A router which receives a packet with an IPv4 Link-Local source or
+ // destination address MUST NOT forward the packet. This prevents
+ // forwarding of packets back onto the network segment from which they
+ // originated, or to any other segment.
+ return nil
+ }
+
ttl := h.TTL()
if ttl == 0 {
// As per RFC 792 page 6, Time Exceeded Message,
@@ -589,8 +637,6 @@ func (e *endpoint) forwardPacket(pkt *stack.PacketBuffer) tcpip.Error {
}
}
- dstAddr := h.DestinationAddress()
-
// Check if the destination is owned by the stack.
if ep := e.protocol.findEndpointWithAddress(dstAddr); ep != nil {
ep.handleValidatedPacket(h, pkt)
@@ -1168,12 +1214,27 @@ func (p *protocol) Forwarding() bool {
return uint8(atomic.LoadUint32(&p.forwarding)) == 1
}
+// setForwarding sets the forwarding status for the protocol.
+//
+// Returns true if the forwarding status was updated.
+func (p *protocol) setForwarding(v bool) bool {
+ if v {
+ return atomic.CompareAndSwapUint32(&p.forwarding, 0 /* old */, 1 /* new */)
+ }
+ return atomic.CompareAndSwapUint32(&p.forwarding, 1 /* old */, 0 /* new */)
+}
+
// SetForwarding implements stack.ForwardingNetworkProtocol.
func (p *protocol) SetForwarding(v bool) {
- if v {
- atomic.StoreUint32(&p.forwarding, 1)
- } else {
- atomic.StoreUint32(&p.forwarding, 0)
+ p.mu.Lock()
+ defer p.mu.Unlock()
+
+ if !p.setForwarding(v) {
+ return
+ }
+
+ for _, ep := range p.mu.eps {
+ ep.transitionForwarding(v)
}
}
diff --git a/pkg/tcpip/network/ipv6/ipv6.go b/pkg/tcpip/network/ipv6/ipv6.go
index c6d9d8f0d..7ee7be0f9 100644
--- a/pkg/tcpip/network/ipv6/ipv6.go
+++ b/pkg/tcpip/network/ipv6/ipv6.go
@@ -410,22 +410,65 @@ func (e *endpoint) dupTentativeAddrDetected(addr tcpip.Address, holderLinkAddr t
//
// Must only be called when the forwarding status changes.
func (e *endpoint) transitionForwarding(forwarding bool) {
+ allRoutersGroups := [...]tcpip.Address{
+ header.IPv6AllRoutersInterfaceLocalMulticastAddress,
+ header.IPv6AllRoutersLinkLocalMulticastAddress,
+ header.IPv6AllRoutersSiteLocalMulticastAddress,
+ }
+
e.mu.Lock()
defer e.mu.Unlock()
- if !e.Enabled() {
- return
- }
-
if forwarding {
// When transitioning into an IPv6 router, host-only state (NDP discovered
// routers, discovered on-link prefixes, and auto-generated addresses) is
// cleaned up/invalidated and NDP router solicitations are stopped.
e.mu.ndp.stopSolicitingRouters()
e.mu.ndp.cleanupState(true /* hostOnly */)
- } else {
- // When transitioning into an IPv6 host, NDP router solicitations are
- // started.
+
+ // As per RFC 4291 section 2.8:
+ //
+ // A router is required to recognize all addresses that a host is
+ // required to recognize, plus the following addresses as identifying
+ // itself:
+ //
+ // o The All-Routers multicast addresses defined in Section 2.7.1.
+ //
+ // As per RFC 4291 section 2.7.1,
+ //
+ // All Routers Addresses: FF01:0:0:0:0:0:0:2
+ // FF02:0:0:0:0:0:0:2
+ // FF05:0:0:0:0:0:0:2
+ //
+ // The above multicast addresses identify the group of all IPv6 routers,
+ // within scope 1 (interface-local), 2 (link-local), or 5 (site-local).
+ for _, g := range allRoutersGroups {
+ if err := e.joinGroupLocked(g); err != nil {
+ // joinGroupLocked only returns an error if the group address is not a
+ // valid IPv6 multicast address.
+ panic(fmt.Sprintf("e.joinGroupLocked(%s): %s", g, err))
+ }
+ }
+
+ return
+ }
+
+ for _, g := range allRoutersGroups {
+ switch err := e.leaveGroupLocked(g).(type) {
+ case nil:
+ case *tcpip.ErrBadLocalAddress:
+ // The endpoint may have already left the multicast group.
+ default:
+ panic(fmt.Sprintf("e.leaveGroupLocked(%s): %s", g, err))
+ }
+ }
+
+ // When transitioning into an IPv6 host, NDP router solicitations are
+ // started if the endpoint is enabled.
+ //
+ // If the endpoint is not currently enabled, routers will be solicited when
+ // the endpoint becomes enabled (if it is still a host).
+ if e.Enabled() {
e.mu.ndp.startSolicitingRouters()
}
}
@@ -573,7 +616,7 @@ func (e *endpoint) disableLocked() {
e.mu.ndp.cleanupState(false /* hostOnly */)
// The endpoint may have already left the multicast group.
- switch err := e.leaveGroupLocked(header.IPv6AllNodesMulticastAddress); err.(type) {
+ switch err := e.leaveGroupLocked(header.IPv6AllNodesMulticastAddress).(type) {
case nil, *tcpip.ErrBadLocalAddress:
default:
panic(fmt.Sprintf("unexpected error when leaving group = %s: %s", header.IPv6AllNodesMulticastAddress, err))
@@ -869,6 +912,16 @@ func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBu
// forwardPacket attempts to forward a packet to its final destination.
func (e *endpoint) forwardPacket(pkt *stack.PacketBuffer) tcpip.Error {
h := header.IPv6(pkt.NetworkHeader().View())
+
+ dstAddr := h.DestinationAddress()
+ if header.IsV6LinkLocalAddress(h.SourceAddress()) || header.IsV6LinkLocalAddress(dstAddr) || header.IsV6LinkLocalMulticastAddress(dstAddr) {
+ // As per RFC 4291 section 2.5.6,
+ //
+ // Routers must not forward any packets with Link-Local source or
+ // destination addresses to other links.
+ return nil
+ }
+
hopLimit := h.HopLimit()
if hopLimit <= 1 {
// As per RFC 4443 section 3.3,
@@ -881,8 +934,6 @@ func (e *endpoint) forwardPacket(pkt *stack.PacketBuffer) tcpip.Error {
return e.protocol.returnError(&icmpReasonHopLimitExceeded{}, pkt)
}
- dstAddr := h.DestinationAddress()
-
// Check if the destination is owned by the stack.
if ep := e.protocol.findEndpointWithAddress(dstAddr); ep != nil {
ep.handleValidatedPacket(h, pkt)
@@ -1979,9 +2030,9 @@ func (p *protocol) Forwarding() bool {
// Returns true if the forwarding status was updated.
func (p *protocol) setForwarding(v bool) bool {
if v {
- return atomic.SwapUint32(&p.forwarding, 1) == 0
+ return atomic.CompareAndSwapUint32(&p.forwarding, 0 /* old */, 1 /* new */)
}
- return atomic.SwapUint32(&p.forwarding, 0) == 1
+ return atomic.CompareAndSwapUint32(&p.forwarding, 1 /* old */, 0 /* new */)
}
// SetForwarding implements stack.ForwardingNetworkProtocol.
diff --git a/pkg/tcpip/network/ipv6/mld.go b/pkg/tcpip/network/ipv6/mld.go
index dd153466d..165b7d2d2 100644
--- a/pkg/tcpip/network/ipv6/mld.go
+++ b/pkg/tcpip/network/ipv6/mld.go
@@ -76,10 +76,29 @@ func (mld *mldState) SendReport(groupAddress tcpip.Address) (bool, tcpip.Error)
//
// Precondition: mld.ep.mu must be read locked.
func (mld *mldState) SendLeave(groupAddress tcpip.Address) tcpip.Error {
- _, err := mld.writePacket(header.IPv6AllRoutersMulticastAddress, groupAddress, header.ICMPv6MulticastListenerDone)
+ _, err := mld.writePacket(header.IPv6AllRoutersLinkLocalMulticastAddress, groupAddress, header.ICMPv6MulticastListenerDone)
return err
}
+// ShouldPerformProtocol implements ip.MulticastGroupProtocol.
+func (mld *mldState) ShouldPerformProtocol(groupAddress tcpip.Address) bool {
+ // As per RFC 2710 section 5 page 10,
+ //
+ // The link-scope all-nodes address (FF02::1) is handled as a special
+ // case. The node starts in Idle Listener state for that address on
+ // every interface, never transitions to another state, and never sends
+ // a Report or Done for that address.
+ //
+ // MLD messages are never sent for multicast addresses whose scope is 0
+ // (reserved) or 1 (node-local).
+ if groupAddress == header.IPv6AllNodesMulticastAddress {
+ return false
+ }
+
+ scope := header.V6MulticastScope(groupAddress)
+ return scope != header.IPv6Reserved0MulticastScope && scope != header.IPv6InterfaceLocalMulticastScope
+}
+
// init sets up an mldState struct, and is required to be called before using
// a new mldState.
//
@@ -91,7 +110,6 @@ func (mld *mldState) init(ep *endpoint) {
Clock: ep.protocol.stack.Clock(),
Protocol: mld,
MaxUnsolicitedReportDelay: UnsolicitedReportIntervalMax,
- AllNodesAddress: header.IPv6AllNodesMulticastAddress,
})
}
diff --git a/pkg/tcpip/network/ipv6/mld_test.go b/pkg/tcpip/network/ipv6/mld_test.go
index 85a8f9944..146b300f1 100644
--- a/pkg/tcpip/network/ipv6/mld_test.go
+++ b/pkg/tcpip/network/ipv6/mld_test.go
@@ -93,7 +93,7 @@ func TestIPv6JoinLeaveSolicitedNodeAddressPerformsMLD(t *testing.T) {
if p, ok := e.Read(); !ok {
t.Fatal("expected a done message to be sent")
} else {
- validateMLDPacket(t, stack.PayloadSince(p.Pkt.NetworkHeader()), header.IPv6Any, header.IPv6AllRoutersMulticastAddress, header.ICMPv6MulticastListenerDone, linkLocalAddrSNMC)
+ validateMLDPacket(t, stack.PayloadSince(p.Pkt.NetworkHeader()), header.IPv6Any, header.IPv6AllRoutersLinkLocalMulticastAddress, header.ICMPv6MulticastListenerDone, linkLocalAddrSNMC)
}
}
@@ -464,3 +464,141 @@ func TestMLDPacketValidation(t *testing.T) {
})
}
}
+
+func TestMLDSkipProtocol(t *testing.T) {
+ const nicID = 1
+
+ tests := []struct {
+ name string
+ group tcpip.Address
+ expectReport bool
+ }{
+ {
+ name: "Reserverd0",
+ group: "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11",
+ expectReport: false,
+ },
+ {
+ name: "Interface Local",
+ group: "\xff\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11",
+ expectReport: false,
+ },
+ {
+ name: "Link Local",
+ group: "\xff\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11",
+ expectReport: true,
+ },
+ {
+ name: "Realm Local",
+ group: "\xff\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11",
+ expectReport: true,
+ },
+ {
+ name: "Admin Local",
+ group: "\xff\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11",
+ expectReport: true,
+ },
+ {
+ name: "Site Local",
+ group: "\xff\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11",
+ expectReport: true,
+ },
+ {
+ name: "Unassigned(6)",
+ group: "\xff\x06\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11",
+ expectReport: true,
+ },
+ {
+ name: "Unassigned(7)",
+ group: "\xff\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11",
+ expectReport: true,
+ },
+ {
+ name: "Organization Local",
+ group: "\xff\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11",
+ expectReport: true,
+ },
+ {
+ name: "Unassigned(9)",
+ group: "\xff\x09\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11",
+ expectReport: true,
+ },
+ {
+ name: "Unassigned(A)",
+ group: "\xff\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11",
+ expectReport: true,
+ },
+ {
+ name: "Unassigned(B)",
+ group: "\xff\x0b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11",
+ expectReport: true,
+ },
+ {
+ name: "Unassigned(C)",
+ group: "\xff\x0c\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11",
+ expectReport: true,
+ },
+ {
+ name: "Unassigned(D)",
+ group: "\xff\x0d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11",
+ expectReport: true,
+ },
+ {
+ name: "Global",
+ group: "\xff\x0e\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11",
+ expectReport: true,
+ },
+ {
+ name: "ReservedF",
+ group: "\xff\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11",
+ expectReport: true,
+ },
+ }
+
+ for _, test := range tests {
+ t.Run(test.name, func(t *testing.T) {
+ s := stack.New(stack.Options{
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+ MLD: ipv6.MLDOptions{
+ Enabled: true,
+ },
+ })},
+ })
+ e := channel.New(1, header.IPv6MinimumMTU, "")
+ if err := s.CreateNIC(nicID, e); err != nil {
+ t.Fatalf("CreateNIC(%d, _): %s", nicID, err)
+ }
+ if err := s.AddAddress(nicID, ipv6.ProtocolNumber, linkLocalAddr); err != nil {
+ t.Fatalf("AddAddress(%d, %d, %s) = %s", nicID, ipv6.ProtocolNumber, linkLocalAddr, err)
+ }
+ if p, ok := e.Read(); !ok {
+ t.Fatal("expected a report message to be sent")
+ } else {
+ validateMLDPacket(t, stack.PayloadSince(p.Pkt.NetworkHeader()), linkLocalAddr, linkLocalAddrSNMC, header.ICMPv6MulticastListenerReport, linkLocalAddrSNMC)
+ }
+
+ if err := s.JoinGroup(ipv6.ProtocolNumber, nicID, test.group); err != nil {
+ t.Fatalf("s.JoinGroup(%d, %d, %s): %s", ipv6.ProtocolNumber, nicID, test.group, err)
+ }
+ if isInGroup, err := s.IsInGroup(nicID, test.group); err != nil {
+ t.Fatalf("IsInGroup(%d, %s): %s", nicID, test.group, err)
+ } else if !isInGroup {
+ t.Fatalf("got IsInGroup(%d, %s) = false, want = true", nicID, test.group)
+ }
+
+ if !test.expectReport {
+ if p, ok := e.Read(); ok {
+ t.Fatalf("got e.Read() = (%#v, true), want = (_, false)", p)
+ }
+
+ return
+ }
+
+ if p, ok := e.Read(); !ok {
+ t.Fatal("expected a report message to be sent")
+ } else {
+ validateMLDPacket(t, stack.PayloadSince(p.Pkt.NetworkHeader()), linkLocalAddr, test.group, header.ICMPv6MulticastListenerReport, test.group)
+ }
+ })
+ }
+}
diff --git a/pkg/tcpip/network/ipv6/ndp.go b/pkg/tcpip/network/ipv6/ndp.go
index 536493f87..dd7f6a126 100644
--- a/pkg/tcpip/network/ipv6/ndp.go
+++ b/pkg/tcpip/network/ipv6/ndp.go
@@ -1703,7 +1703,7 @@ func (ndp *ndpState) startSolicitingRouters() {
// the unspecified address if no address is assigned
// to the sending interface.
localAddr := header.IPv6Any
- if addressEndpoint := ndp.ep.AcquireOutgoingPrimaryAddress(header.IPv6AllRoutersMulticastAddress, false); addressEndpoint != nil {
+ if addressEndpoint := ndp.ep.AcquireOutgoingPrimaryAddress(header.IPv6AllRoutersLinkLocalMulticastAddress, false); addressEndpoint != nil {
localAddr = addressEndpoint.AddressWithPrefix().Address
addressEndpoint.DecRef()
}
@@ -1730,7 +1730,7 @@ func (ndp *ndpState) startSolicitingRouters() {
icmpData.SetChecksum(header.ICMPv6Checksum(header.ICMPv6ChecksumParams{
Header: icmpData,
Src: localAddr,
- Dst: header.IPv6AllRoutersMulticastAddress,
+ Dst: header.IPv6AllRoutersLinkLocalMulticastAddress,
}))
pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
@@ -1739,14 +1739,14 @@ func (ndp *ndpState) startSolicitingRouters() {
})
sent := ndp.ep.stats.icmp.packetsSent
- if err := addIPHeader(localAddr, header.IPv6AllRoutersMulticastAddress, pkt, stack.NetworkHeaderParams{
+ if err := addIPHeader(localAddr, header.IPv6AllRoutersLinkLocalMulticastAddress, pkt, stack.NetworkHeaderParams{
Protocol: header.ICMPv6ProtocolNumber,
TTL: header.NDPHopLimit,
}, nil /* extensionHeaders */); err != nil {
panic(fmt.Sprintf("failed to add IP header: %s", err))
}
- if err := ndp.ep.nic.WritePacketToRemote(header.EthernetAddressFromMulticastIPv6Address(header.IPv6AllRoutersMulticastAddress), nil /* gso */, ProtocolNumber, pkt); err != nil {
+ if err := ndp.ep.nic.WritePacketToRemote(header.EthernetAddressFromMulticastIPv6Address(header.IPv6AllRoutersLinkLocalMulticastAddress), nil /* gso */, ProtocolNumber, pkt); err != nil {
sent.dropped.Increment()
// Don't send any more messages if we had an error.
remaining = 0
diff --git a/pkg/tcpip/network/multicast_group_test.go b/pkg/tcpip/network/multicast_group_test.go
index ecd5003a7..2aa4e6d75 100644
--- a/pkg/tcpip/network/multicast_group_test.go
+++ b/pkg/tcpip/network/multicast_group_test.go
@@ -194,7 +194,7 @@ func checkInitialIPv6Groups(t *testing.T, e *channel.Endpoint, s *stack.Stack, c
if p, ok := e.Read(); !ok {
t.Fatal("expected a report message to be sent")
} else {
- validateMLDPacket(t, p, header.IPv6AllRoutersMulticastAddress, mldDone, 0, ipv6AddrSNMC)
+ validateMLDPacket(t, p, header.IPv6AllRoutersLinkLocalMulticastAddress, mldDone, 0, ipv6AddrSNMC)
}
// Should not send any more packets.
@@ -606,7 +606,7 @@ func TestMGPLeaveGroup(t *testing.T) {
validateLeave: func(t *testing.T, p channel.PacketInfo) {
t.Helper()
- validateMLDPacket(t, p, header.IPv6AllRoutersMulticastAddress, mldDone, 0, ipv6MulticastAddr1)
+ validateMLDPacket(t, p, header.IPv6AllRoutersLinkLocalMulticastAddress, mldDone, 0, ipv6MulticastAddr1)
},
checkInitialGroups: checkInitialIPv6Groups,
},
@@ -1014,7 +1014,7 @@ func TestMGPWithNICLifecycle(t *testing.T) {
validateLeave: func(t *testing.T, p channel.PacketInfo, addr tcpip.Address) {
t.Helper()
- validateMLDPacket(t, p, header.IPv6AllRoutersMulticastAddress, mldDone, 0, addr)
+ validateMLDPacket(t, p, header.IPv6AllRoutersLinkLocalMulticastAddress, mldDone, 0, addr)
},
getAndCheckGroupAddress: func(t *testing.T, seen map[tcpip.Address]bool, p channel.PacketInfo) tcpip.Address {
t.Helper()
diff --git a/pkg/tcpip/stack/ndp_test.go b/pkg/tcpip/stack/ndp_test.go
index 14124ae66..a869cce38 100644
--- a/pkg/tcpip/stack/ndp_test.go
+++ b/pkg/tcpip/stack/ndp_test.go
@@ -5204,13 +5204,13 @@ func TestRouterSolicitation(t *testing.T) {
}
// Make sure the right remote link address is used.
- if want := header.EthernetAddressFromMulticastIPv6Address(header.IPv6AllRoutersMulticastAddress); p.Route.RemoteLinkAddress != want {
+ if want := header.EthernetAddressFromMulticastIPv6Address(header.IPv6AllRoutersLinkLocalMulticastAddress); p.Route.RemoteLinkAddress != want {
t.Errorf("got remote link address = %s, want = %s", p.Route.RemoteLinkAddress, want)
}
checker.IPv6(t, stack.PayloadSince(p.Pkt.NetworkHeader()),
checker.SrcAddr(test.expectedSrcAddr),
- checker.DstAddr(header.IPv6AllRoutersMulticastAddress),
+ checker.DstAddr(header.IPv6AllRoutersLinkLocalMulticastAddress),
checker.TTL(header.NDPHopLimit),
checker.NDPRS(checker.NDPRSOptions(test.expectedNDPOpts)),
)
@@ -5362,7 +5362,7 @@ func TestStopStartSolicitingRouters(t *testing.T) {
}
checker.IPv6(t, stack.PayloadSince(p.Pkt.NetworkHeader()),
checker.SrcAddr(header.IPv6Any),
- checker.DstAddr(header.IPv6AllRoutersMulticastAddress),
+ checker.DstAddr(header.IPv6AllRoutersLinkLocalMulticastAddress),
checker.TTL(header.NDPHopLimit),
checker.NDPRS())
}
diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go
index 87ea09a5e..60de16579 100644
--- a/pkg/tcpip/tcpip.go
+++ b/pkg/tcpip/tcpip.go
@@ -786,6 +786,13 @@ func (*TCPRecovery) isGettableTransportProtocolOption() {}
func (*TCPRecovery) isSettableTransportProtocolOption() {}
+// TCPAlwaysUseSynCookies indicates unconditional usage of syncookies.
+type TCPAlwaysUseSynCookies bool
+
+func (*TCPAlwaysUseSynCookies) isGettableTransportProtocolOption() {}
+
+func (*TCPAlwaysUseSynCookies) isSettableTransportProtocolOption() {}
+
const (
// TCPRACKLossDetection indicates RACK is used for loss detection and
// recovery.
@@ -1020,19 +1027,6 @@ func (*TCPMaxRetriesOption) isGettableTransportProtocolOption() {}
func (*TCPMaxRetriesOption) isSettableTransportProtocolOption() {}
-// TCPSynRcvdCountThresholdOption is used by SetSockOpt/GetSockOpt to specify
-// the number of endpoints that can be in SYN-RCVD state before the stack
-// switches to using SYN cookies.
-type TCPSynRcvdCountThresholdOption uint64
-
-func (*TCPSynRcvdCountThresholdOption) isGettableSocketOption() {}
-
-func (*TCPSynRcvdCountThresholdOption) isSettableSocketOption() {}
-
-func (*TCPSynRcvdCountThresholdOption) isGettableTransportProtocolOption() {}
-
-func (*TCPSynRcvdCountThresholdOption) isSettableTransportProtocolOption() {}
-
// TCPSynRetriesOption is used by SetSockOpt/GetSockOpt to specify stack-wide
// default for number of times SYN is retransmitted before aborting a connect.
type TCPSynRetriesOption uint8
diff --git a/pkg/tcpip/tests/integration/BUILD b/pkg/tcpip/tests/integration/BUILD
index 3cc8c36f1..3b51e4be0 100644
--- a/pkg/tcpip/tests/integration/BUILD
+++ b/pkg/tcpip/tests/integration/BUILD
@@ -9,6 +9,8 @@ go_test(
deps = [
"//pkg/tcpip",
"//pkg/tcpip/checker",
+ "//pkg/tcpip/header",
+ "//pkg/tcpip/link/channel",
"//pkg/tcpip/network/arp",
"//pkg/tcpip/network/ipv4",
"//pkg/tcpip/network/ipv6",
diff --git a/pkg/tcpip/tests/integration/forward_test.go b/pkg/tcpip/tests/integration/forward_test.go
index d10ae05c2..0de5079e8 100644
--- a/pkg/tcpip/tests/integration/forward_test.go
+++ b/pkg/tcpip/tests/integration/forward_test.go
@@ -21,6 +21,8 @@ import (
"github.com/google/go-cmp/cmp"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/checker"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/link/channel"
"gvisor.dev/gvisor/pkg/tcpip/network/arp"
"gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
"gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
@@ -312,3 +314,193 @@ func TestForwarding(t *testing.T) {
})
}
}
+
+func TestMulticastForwarding(t *testing.T) {
+ const (
+ nicID1 = 1
+ nicID2 = 2
+
+ ipv4LinkLocalUnicastAddr = tcpip.Address("\xa9\xfe\x00\x0a")
+ ipv4LinkLocalMulticastAddr = tcpip.Address("\xe0\x00\x00\x0a")
+ ipv4GlobalMulticastAddr = tcpip.Address("\xe0\x00\x01\x0a")
+
+ ipv6LinkLocalUnicastAddr = tcpip.Address("\xfe\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0a")
+ ipv6LinkLocalMulticastAddr = tcpip.Address("\xff\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0a")
+ ipv6GlobalMulticastAddr = tcpip.Address("\xff\x0e\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0a")
+
+ ttl = 64
+ )
+
+ rxICMPv4EchoRequest := func(e *channel.Endpoint, src, dst tcpip.Address) {
+ utils.RxICMPv4EchoRequest(e, src, dst, ttl)
+ }
+
+ rxICMPv6EchoRequest := func(e *channel.Endpoint, src, dst tcpip.Address) {
+ utils.RxICMPv6EchoRequest(e, src, dst, ttl)
+ }
+
+ v4Checker := func(t *testing.T, b []byte, src, dst tcpip.Address) {
+ checker.IPv4(t, b,
+ checker.SrcAddr(src),
+ checker.DstAddr(dst),
+ checker.TTL(ttl-1),
+ checker.ICMPv4(
+ checker.ICMPv4Type(header.ICMPv4Echo)))
+ }
+
+ v6Checker := func(t *testing.T, b []byte, src, dst tcpip.Address) {
+ checker.IPv6(t, b,
+ checker.SrcAddr(src),
+ checker.DstAddr(dst),
+ checker.TTL(ttl-1),
+ checker.ICMPv6(
+ checker.ICMPv6Type(header.ICMPv6EchoRequest)))
+ }
+
+ tests := []struct {
+ name string
+ srcAddr, dstAddr tcpip.Address
+ rx func(*channel.Endpoint, tcpip.Address, tcpip.Address)
+ expectForward bool
+ checker func(*testing.T, []byte)
+ }{
+ {
+ name: "IPv4 link-local multicast destination",
+ srcAddr: utils.RemoteIPv4Addr,
+ dstAddr: ipv4LinkLocalMulticastAddr,
+ rx: rxICMPv4EchoRequest,
+ expectForward: false,
+ },
+ {
+ name: "IPv4 link-local source",
+ srcAddr: ipv4LinkLocalUnicastAddr,
+ dstAddr: utils.RemoteIPv4Addr,
+ rx: rxICMPv4EchoRequest,
+ expectForward: false,
+ },
+ {
+ name: "IPv4 link-local destination",
+ srcAddr: utils.RemoteIPv4Addr,
+ dstAddr: ipv4LinkLocalUnicastAddr,
+ rx: rxICMPv4EchoRequest,
+ expectForward: false,
+ },
+ {
+ name: "IPv4 non-link-local unicast",
+ srcAddr: utils.RemoteIPv4Addr,
+ dstAddr: utils.Ipv4Addr2.AddressWithPrefix.Address,
+ rx: rxICMPv4EchoRequest,
+ expectForward: true,
+ checker: func(t *testing.T, b []byte) {
+ v4Checker(t, b, utils.RemoteIPv4Addr, utils.Ipv4Addr2.AddressWithPrefix.Address)
+ },
+ },
+ {
+ name: "IPv4 non-link-local multicast",
+ srcAddr: utils.RemoteIPv4Addr,
+ dstAddr: ipv4GlobalMulticastAddr,
+ rx: rxICMPv4EchoRequest,
+ expectForward: true,
+ checker: func(t *testing.T, b []byte) {
+ v4Checker(t, b, utils.RemoteIPv4Addr, ipv4GlobalMulticastAddr)
+ },
+ },
+
+ {
+ name: "IPv6 link-local multicast destination",
+ srcAddr: utils.RemoteIPv6Addr,
+ dstAddr: ipv6LinkLocalMulticastAddr,
+ rx: rxICMPv6EchoRequest,
+ expectForward: false,
+ },
+ {
+ name: "IPv6 link-local source",
+ srcAddr: ipv6LinkLocalUnicastAddr,
+ dstAddr: utils.RemoteIPv6Addr,
+ rx: rxICMPv6EchoRequest,
+ expectForward: false,
+ },
+ {
+ name: "IPv6 link-local destination",
+ srcAddr: utils.RemoteIPv6Addr,
+ dstAddr: ipv6LinkLocalUnicastAddr,
+ rx: rxICMPv6EchoRequest,
+ expectForward: false,
+ },
+ {
+ name: "IPv6 non-link-local unicast",
+ srcAddr: utils.RemoteIPv6Addr,
+ dstAddr: utils.Ipv6Addr2.AddressWithPrefix.Address,
+ rx: rxICMPv6EchoRequest,
+ expectForward: true,
+ checker: func(t *testing.T, b []byte) {
+ v6Checker(t, b, utils.RemoteIPv6Addr, utils.Ipv6Addr2.AddressWithPrefix.Address)
+ },
+ },
+ {
+ name: "IPv6 non-link-local multicast",
+ srcAddr: utils.RemoteIPv6Addr,
+ dstAddr: ipv6GlobalMulticastAddr,
+ rx: rxICMPv6EchoRequest,
+ expectForward: true,
+ checker: func(t *testing.T, b []byte) {
+ v6Checker(t, b, utils.RemoteIPv6Addr, ipv6GlobalMulticastAddr)
+ },
+ },
+ }
+
+ for _, test := range tests {
+ t.Run(test.name, func(t *testing.T) {
+ s := stack.New(stack.Options{
+ NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol, ipv6.NewProtocol},
+ TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol},
+ })
+
+ e1 := channel.New(1, header.IPv6MinimumMTU, "")
+ if err := s.CreateNIC(nicID1, e1); err != nil {
+ t.Fatalf("s.CreateNIC(%d, _): %s", nicID1, err)
+ }
+
+ e2 := channel.New(1, header.IPv6MinimumMTU, "")
+ if err := s.CreateNIC(nicID2, e2); err != nil {
+ t.Fatalf("s.CreateNIC(%d, _): %s", nicID2, err)
+ }
+
+ if err := s.AddAddress(nicID2, ipv4.ProtocolNumber, utils.Ipv4Addr.Address); err != nil {
+ t.Fatalf("s.AddAddress(%d, %d, %s): %s", nicID2, ipv4.ProtocolNumber, utils.Ipv4Addr.Address, err)
+ }
+ if err := s.AddAddress(nicID2, ipv6.ProtocolNumber, utils.Ipv6Addr.Address); err != nil {
+ t.Fatalf("s.AddAddress(%d, %d, %s): %s", nicID2, ipv6.ProtocolNumber, utils.Ipv6Addr.Address, err)
+ }
+
+ if err := s.SetForwarding(ipv4.ProtocolNumber, true); err != nil {
+ t.Fatalf("s.SetForwarding(%d, true): %s", ipv4.ProtocolNumber, err)
+ }
+ if err := s.SetForwarding(ipv6.ProtocolNumber, true); err != nil {
+ t.Fatalf("s.SetForwarding(%d, true): %s", ipv6.ProtocolNumber, err)
+ }
+
+ s.SetRouteTable([]tcpip.Route{
+ {
+ Destination: header.IPv4EmptySubnet,
+ NIC: nicID2,
+ },
+ {
+ Destination: header.IPv6EmptySubnet,
+ NIC: nicID2,
+ },
+ })
+
+ test.rx(e1, test.srcAddr, test.dstAddr)
+
+ p, ok := e2.Read()
+ if ok != test.expectForward {
+ t.Fatalf("got e2.Read() = (%#v, %t), want = (_, %t)", p, ok, test.expectForward)
+ }
+
+ if test.expectForward {
+ test.checker(t, stack.PayloadSince(p.Pkt.NetworkHeader()))
+ }
+ })
+ }
+}
diff --git a/pkg/tcpip/tests/integration/loopback_test.go b/pkg/tcpip/tests/integration/loopback_test.go
index 2c538a43e..82c2e11ab 100644
--- a/pkg/tcpip/tests/integration/loopback_test.go
+++ b/pkg/tcpip/tests/integration/loopback_test.go
@@ -513,22 +513,23 @@ func TestExternalLoopbackTraffic(t *testing.T) {
ipv4Loopback = tcpip.Address("\x7f\x00\x00\x01")
numPackets = 1
+ ttl = 64
)
loopbackSourcedICMPv4 := func(e *channel.Endpoint) {
- utils.RxICMPv4EchoRequest(e, ipv4Loopback, utils.Ipv4Addr.Address)
+ utils.RxICMPv4EchoRequest(e, ipv4Loopback, utils.Ipv4Addr.Address, ttl)
}
loopbackSourcedICMPv6 := func(e *channel.Endpoint) {
- utils.RxICMPv6EchoRequest(e, header.IPv6Loopback, utils.Ipv6Addr.Address)
+ utils.RxICMPv6EchoRequest(e, header.IPv6Loopback, utils.Ipv6Addr.Address, ttl)
}
loopbackDestinedICMPv4 := func(e *channel.Endpoint) {
- utils.RxICMPv4EchoRequest(e, utils.RemoteIPv4Addr, ipv4Loopback)
+ utils.RxICMPv4EchoRequest(e, utils.RemoteIPv4Addr, ipv4Loopback, ttl)
}
loopbackDestinedICMPv6 := func(e *channel.Endpoint) {
- utils.RxICMPv6EchoRequest(e, utils.RemoteIPv6Addr, header.IPv6Loopback)
+ utils.RxICMPv6EchoRequest(e, utils.RemoteIPv6Addr, header.IPv6Loopback, ttl)
}
invalidSrcAddrStat := func(s tcpip.IPStats) *tcpip.StatCounter {
diff --git a/pkg/tcpip/tests/integration/multicast_broadcast_test.go b/pkg/tcpip/tests/integration/multicast_broadcast_test.go
index c6a9c2393..09ff3b892 100644
--- a/pkg/tcpip/tests/integration/multicast_broadcast_test.go
+++ b/pkg/tcpip/tests/integration/multicast_broadcast_test.go
@@ -43,12 +43,15 @@ const (
// to a multicast or broadcast address uses a unicast source address for the
// reply.
func TestPingMulticastBroadcast(t *testing.T) {
- const nicID = 1
+ const (
+ nicID = 1
+ ttl = 64
+ )
tests := []struct {
name string
protoNum tcpip.NetworkProtocolNumber
- rxICMP func(*channel.Endpoint, tcpip.Address, tcpip.Address)
+ rxICMP func(*channel.Endpoint, tcpip.Address, tcpip.Address, uint8)
srcAddr tcpip.Address
dstAddr tcpip.Address
expectedSrc tcpip.Address
@@ -136,7 +139,7 @@ func TestPingMulticastBroadcast(t *testing.T) {
},
})
- test.rxICMP(e, test.srcAddr, test.dstAddr)
+ test.rxICMP(e, test.srcAddr, test.dstAddr, ttl)
pkt, ok := e.Read()
if !ok {
t.Fatal("expected ICMP response")
diff --git a/pkg/tcpip/tests/utils/utils.go b/pkg/tcpip/tests/utils/utils.go
index d1c9f3a94..8fd9be32b 100644
--- a/pkg/tcpip/tests/utils/utils.go
+++ b/pkg/tcpip/tests/utils/utils.go
@@ -48,10 +48,6 @@ const (
LinkAddr4 = tcpip.LinkAddress("\x02\x03\x03\x04\x05\x09")
)
-const (
- ttl = 255
-)
-
// Common IP addresses used by tests.
var (
Ipv4Addr = tcpip.AddressWithPrefix{
@@ -322,7 +318,7 @@ func SetupRoutedStacks(t *testing.T, host1Stack, routerStack, host2Stack *stack.
// RxICMPv4EchoRequest constructs and injects an ICMPv4 echo request packet on
// the provided endpoint.
-func RxICMPv4EchoRequest(e *channel.Endpoint, src, dst tcpip.Address) {
+func RxICMPv4EchoRequest(e *channel.Endpoint, src, dst tcpip.Address, ttl uint8) {
totalLen := header.IPv4MinimumSize + header.ICMPv4MinimumSize
hdr := buffer.NewPrependable(totalLen)
pkt := header.ICMPv4(hdr.Prepend(header.ICMPv4MinimumSize))
@@ -347,7 +343,7 @@ func RxICMPv4EchoRequest(e *channel.Endpoint, src, dst tcpip.Address) {
// RxICMPv6EchoRequest constructs and injects an ICMPv6 echo request packet on
// the provided endpoint.
-func RxICMPv6EchoRequest(e *channel.Endpoint, src, dst tcpip.Address) {
+func RxICMPv6EchoRequest(e *channel.Endpoint, src, dst tcpip.Address, ttl uint8) {
totalLen := header.IPv6MinimumSize + header.ICMPv6MinimumSize
hdr := buffer.NewPrependable(totalLen)
pkt := header.ICMPv6(hdr.Prepend(header.ICMPv6MinimumSize))
diff --git a/pkg/tcpip/transport/tcp/accept.go b/pkg/tcpip/transport/tcp/accept.go
index 025b134e2..7372ebc08 100644
--- a/pkg/tcpip/transport/tcp/accept.go
+++ b/pkg/tcpip/transport/tcp/accept.go
@@ -51,11 +51,6 @@ const (
// timestamp and the current timestamp. If the difference is greater
// than maxTSDiff, the cookie is expired.
maxTSDiff = 2
-
- // SynRcvdCountThreshold is the default global maximum number of
- // connections that are allowed to be in SYN-RCVD state before TCP
- // starts using SYN cookies to accept connections.
- SynRcvdCountThreshold uint64 = 1000
)
var (
@@ -80,9 +75,6 @@ func encodeMSS(mss uint16) uint32 {
type listenContext struct {
stack *stack.Stack
- // synRcvdCount is a reference to the stack level synRcvdCount.
- synRcvdCount *synRcvdCounter
-
// rcvWnd is the receive window that is sent by this listening context
// in the initial SYN-ACK.
rcvWnd seqnum.Size
@@ -138,11 +130,6 @@ func newListenContext(stk *stack.Stack, listenEP *endpoint, rcvWnd seqnum.Size,
listenEP: listenEP,
pendingEndpoints: make(map[stack.TransportEndpointID]*endpoint),
}
- p, ok := stk.TransportProtocolInstance(ProtocolNumber).(*protocol)
- if !ok {
- panic(fmt.Sprintf("unable to get TCP protocol instance from stack: %+v", stk))
- }
- l.synRcvdCount = p.SynRcvdCounter()
rand.Read(l.nonce[0][:])
rand.Read(l.nonce[1][:])
@@ -199,6 +186,14 @@ func (l *listenContext) isCookieValid(id stack.TransportEndpointID, cookie seqnu
return (v - l.cookieHash(id, cookieTS, 1)) & hashMask, true
}
+func (l *listenContext) useSynCookies() bool {
+ var alwaysUseSynCookies tcpip.TCPAlwaysUseSynCookies
+ if err := l.stack.TransportProtocolOption(header.TCPProtocolNumber, &alwaysUseSynCookies); err != nil {
+ panic(fmt.Sprintf("TransportProtocolOption(%d, %T) = %s", header.TCPProtocolNumber, alwaysUseSynCookies, err))
+ }
+ return bool(alwaysUseSynCookies) || (l.listenEP != nil && l.listenEP.synRcvdBacklogFull())
+}
+
// createConnectingEndpoint creates a new endpoint in a connecting state, with
// the connection parameters given by the arguments.
func (l *listenContext) createConnectingEndpoint(s *segment, iss seqnum.Value, irs seqnum.Value, rcvdSynOpts *header.TCPSynOptions, queue *waiter.Queue) (*endpoint, tcpip.Error) {
@@ -307,6 +302,7 @@ func (l *listenContext) startHandshake(s *segment, opts *header.TCPSynOptions, q
// Initialize and start the handshake.
h := ep.newPassiveHandshake(isn, irs, opts, deferAccept)
+ h.listenEP = l.listenEP
h.start()
return h, nil
}
@@ -485,7 +481,6 @@ func (e *endpoint) handleSynSegment(ctx *listenContext, s *segment, opts *header
}
go func() {
- defer ctx.synRcvdCount.dec()
if err := h.complete(); err != nil {
e.stack.Stats().TCP.FailedConnectionAttempts.Increment()
e.stats.FailedConnectionAttempts.Increment()
@@ -497,24 +492,29 @@ func (e *endpoint) handleSynSegment(ctx *listenContext, s *segment, opts *header
h.ep.startAcceptedLoop()
e.stack.Stats().TCP.PassiveConnectionOpenings.Increment()
e.deliverAccepted(h.ep, false /*withSynCookie*/)
- }() // S/R-SAFE: synRcvdCount is the barrier.
+ }()
return nil
}
-func (e *endpoint) incSynRcvdCount() bool {
+func (e *endpoint) synRcvdBacklogFull() bool {
e.acceptMu.Lock()
- canInc := int(atomic.LoadInt32(&e.synRcvdCount)) < cap(e.acceptedChan)
+ acceptedChanCap := cap(e.acceptedChan)
e.acceptMu.Unlock()
- if canInc {
- atomic.AddInt32(&e.synRcvdCount, 1)
- }
- return canInc
+ // The allocated accepted channel size would always be one greater than the
+ // listen backlog. But, the SYNRCVD connections count is always checked
+ // against the listen backlog value for Linux parity reason.
+ // https://github.com/torvalds/linux/blob/7acac4b3196/include/net/inet_connection_sock.h#L280
+ //
+ // We maintain an equality check here as the synRcvdCount is incremented
+ // and compared only from a single listener context and the capacity of
+ // the accepted channel can only increase by a new listen call.
+ return int(atomic.LoadInt32(&e.synRcvdCount)) == acceptedChanCap-1
}
func (e *endpoint) acceptQueueIsFull() bool {
e.acceptMu.Lock()
- full := len(e.acceptedChan)+int(atomic.LoadInt32(&e.synRcvdCount)) >= cap(e.acceptedChan)
+ full := len(e.acceptedChan) == cap(e.acceptedChan)
e.acceptMu.Unlock()
return full
}
@@ -539,17 +539,13 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) tcpip.Err
switch {
case s.flags == header.TCPFlagSyn:
opts := parseSynSegmentOptions(s)
- if ctx.synRcvdCount.inc() {
- // Only handle the syn if the following conditions hold
- // - accept queue is not full.
- // - number of connections in synRcvd state is less than the
- // backlog.
- if !e.acceptQueueIsFull() && e.incSynRcvdCount() {
+ if !ctx.useSynCookies() {
+ if !e.acceptQueueIsFull() {
s.incRef()
+ atomic.AddInt32(&e.synRcvdCount, 1)
_ = e.handleSynSegment(ctx, s, &opts)
return nil
}
- ctx.synRcvdCount.dec()
e.stack.Stats().TCP.ListenOverflowSynDrop.Increment()
e.stats.ReceiveErrors.ListenOverflowSynDrop.Increment()
e.stack.Stats().DroppedPackets.Increment()
@@ -615,25 +611,6 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) tcpip.Err
return nil
}
- if !ctx.synRcvdCount.synCookiesInUse() {
- // When not using SYN cookies, as per RFC 793, section 3.9, page 64:
- // Any acknowledgment is bad if it arrives on a connection still in
- // the LISTEN state. An acceptable reset segment should be formed
- // for any arriving ACK-bearing segment. The RST should be
- // formatted as follows:
- //
- // <SEQ=SEG.ACK><CTL=RST>
- //
- // Send a reset as this is an ACK for which there is no
- // half open connections and we are not using cookies
- // yet.
- //
- // The only time we should reach here when a connection
- // was opened and closed really quickly and a delayed
- // ACK was received from the sender.
- return replyWithReset(e.stack, s, e.sendTOS, e.ttl)
- }
-
iss := s.ackNumber - 1
irs := s.sequenceNumber - 1
@@ -651,7 +628,23 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) tcpip.Err
if !ok || int(data) >= len(mssTable) {
e.stack.Stats().TCP.ListenOverflowInvalidSynCookieRcvd.Increment()
e.stack.Stats().DroppedPackets.Increment()
- return nil
+
+ // When not using SYN cookies, as per RFC 793, section 3.9, page 64:
+ // Any acknowledgment is bad if it arrives on a connection still in
+ // the LISTEN state. An acceptable reset segment should be formed
+ // for any arriving ACK-bearing segment. The RST should be
+ // formatted as follows:
+ //
+ // <SEQ=SEG.ACK><CTL=RST>
+ //
+ // Send a reset as this is an ACK for which there is no
+ // half open connections and we are not using cookies
+ // yet.
+ //
+ // The only time we should reach here when a connection
+ // was opened and closed really quickly and a delayed
+ // ACK was received from the sender.
+ return replyWithReset(e.stack, s, e.sendTOS, e.ttl)
}
e.stack.Stats().TCP.ListenOverflowSynCookieRcvd.Increment()
// Create newly accepted endpoint and deliver it.
diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go
index a9e978cf6..8f0f0c3e9 100644
--- a/pkg/tcpip/transport/tcp/connect.go
+++ b/pkg/tcpip/transport/tcp/connect.go
@@ -65,11 +65,12 @@ const (
// NOTE: handshake.ep.mu is held during handshake processing. It is released if
// we are going to block and reacquired when we start processing an event.
type handshake struct {
- ep *endpoint
- state handshakeState
- active bool
- flags header.TCPFlags
- ackNum seqnum.Value
+ ep *endpoint
+ listenEP *endpoint
+ state handshakeState
+ active bool
+ flags header.TCPFlags
+ ackNum seqnum.Value
// iss is the initial send sequence number, as defined in RFC 793.
iss seqnum.Value
@@ -394,6 +395,15 @@ func (h *handshake) synRcvdState(s *segment) tcpip.Error {
return nil
}
+ // Drop the ACK if the accept queue is full.
+ // https://github.com/torvalds/linux/blob/7acac4b3196/net/ipv4/tcp_ipv4.c#L1523
+ // We could abort the connection as well with a tunable as in
+ // https://github.com/torvalds/linux/blob/7acac4b3196/net/ipv4/tcp_minisocks.c#L788
+ if listenEP := h.listenEP; listenEP != nil && listenEP.acceptQueueIsFull() {
+ listenEP.stack.Stats().DroppedPackets.Increment()
+ return nil
+ }
+
// Update timestamp if required. See RFC7323, section-4.3.
if h.ep.sendTSOk && s.parsedOptions.TS {
h.ep.updateRecentTimestamp(s.parsedOptions.TSVal, h.ackNum, s.sequenceNumber)
diff --git a/pkg/tcpip/transport/tcp/dual_stack_test.go b/pkg/tcpip/transport/tcp/dual_stack_test.go
index f6a16f96e..d6d68f128 100644
--- a/pkg/tcpip/transport/tcp/dual_stack_test.go
+++ b/pkg/tcpip/transport/tcp/dual_stack_test.go
@@ -565,17 +565,15 @@ func TestV4AcceptOnV4(t *testing.T) {
}
func testV4ListenClose(t *testing.T, c *context.Context) {
- // Set the SynRcvd threshold to zero to force a syn cookie based accept
- // to happen.
- var opt tcpip.TCPSynRcvdCountThresholdOption
+ opt := tcpip.TCPAlwaysUseSynCookies(true)
if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
- t.Fatalf("setting TCPSynRcvdCountThresholdOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, opt, opt, err)
+ t.Fatalf("SetTransportProtocolOption(%d, &%T(%t)): %s", tcp.ProtocolNumber, opt, opt, err)
}
- const n = uint16(32)
+ const n = 32
// Start listening.
- if err := c.EP.Listen(int(tcp.SynRcvdCountThreshold + 1)); err != nil {
+ if err := c.EP.Listen(n); err != nil {
t.Fatalf("Listen failed: %v", err)
}
@@ -591,9 +589,9 @@ func testV4ListenClose(t *testing.T, c *context.Context) {
})
}
- // Each of these ACK's will cause a syn-cookie based connection to be
+ // Each of these ACKs will cause a syn-cookie based connection to be
// accepted and delivered to the listening endpoint.
- for i := uint16(0); i < n; i++ {
+ for i := 0; i < n; i++ {
b := c.GetPacket()
tcp := header.TCP(header.IPv4(b).Payload())
iss := seqnum.Value(tcp.SequenceNumber())
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index c5daba232..9438056f9 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -2474,6 +2474,20 @@ func (e *endpoint) shutdownLocked(flags tcpip.ShutdownFlags) tcpip.Error {
// Listen puts the endpoint in "listen" mode, which allows it to accept
// new connections.
func (e *endpoint) Listen(backlog int) tcpip.Error {
+ if uint32(backlog) > MaxListenBacklog {
+ // Linux treats incoming backlog as uint with a limit defined by
+ // sysctl_somaxconn.
+ // https://github.com/torvalds/linux/blob/7acac4b3196/net/socket.c#L1666
+ //
+ // We use the backlog to allocate a channel of that size, hence enforce
+ // a hard limit for the backlog.
+ backlog = MaxListenBacklog
+ } else {
+ // Accept one more than the configured listen backlog to keep in parity with
+ // Linux. Ref, because of missing equality check here:
+ // https://github.com/torvalds/linux/blob/7acac4b3196/include/net/sock.h#L937
+ backlog++
+ }
err := e.listen(backlog)
if err != nil {
if !err.IgnoreStats() {
diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go
index 2a4667906..230fa6ebe 100644
--- a/pkg/tcpip/transport/tcp/protocol.go
+++ b/pkg/tcpip/transport/tcp/protocol.go
@@ -68,6 +68,9 @@ const (
// DefaultSynRetries is the default value for the number of SYN retransmits
// before a connect is aborted.
DefaultSynRetries = 6
+
+ // MaxListenBacklog is the maximum limit of listen backlog supported.
+ MaxListenBacklog = 1024
)
const (
@@ -75,63 +78,6 @@ const (
ccCubic = "cubic"
)
-// syncRcvdCounter tracks the number of endpoints in the SYN-RCVD state. The
-// value is protected by a mutex so that we can increment only when it's
-// guaranteed not to go above a threshold.
-type synRcvdCounter struct {
- sync.Mutex
- value uint64
- pending sync.WaitGroup
- threshold uint64
-}
-
-// inc tries to increment the global number of endpoints in SYN-RCVD state. It
-// succeeds if the increment doesn't make the count go beyond the threshold, and
-// fails otherwise.
-func (s *synRcvdCounter) inc() bool {
- s.Lock()
- defer s.Unlock()
- if s.value >= s.threshold {
- return false
- }
-
- s.pending.Add(1)
- s.value++
-
- return true
-}
-
-// dec atomically decrements the global number of endpoints in SYN-RCVD
-// state. It must only be called if a previous call to inc succeeded.
-func (s *synRcvdCounter) dec() {
- s.Lock()
- defer s.Unlock()
- s.value--
- s.pending.Done()
-}
-
-// synCookiesInUse returns true if the synRcvdCount is greater than
-// SynRcvdCountThreshold.
-func (s *synRcvdCounter) synCookiesInUse() bool {
- s.Lock()
- defer s.Unlock()
- return s.value >= s.threshold
-}
-
-// SetThreshold sets synRcvdCounter.Threshold to ths new threshold.
-func (s *synRcvdCounter) SetThreshold(threshold uint64) {
- s.Lock()
- defer s.Unlock()
- s.threshold = threshold
-}
-
-// Threshold returns the current value of synRcvdCounter.Threhsold.
-func (s *synRcvdCounter) Threshold() uint64 {
- s.Lock()
- defer s.Unlock()
- return s.threshold
-}
-
type protocol struct {
stack *stack.Stack
@@ -139,6 +85,7 @@ type protocol struct {
sackEnabled bool
recovery tcpip.TCPRecovery
delayEnabled bool
+ alwaysUseSynCookies bool
sendBufferSize tcpip.TCPSendBufferSizeRangeOption
recvBufferSize tcpip.TCPReceiveBufferSizeRangeOption
congestionControl string
@@ -150,7 +97,6 @@ type protocol struct {
minRTO time.Duration
maxRTO time.Duration
maxRetries uint32
- synRcvdCount synRcvdCounter
synRetries uint8
dispatcher dispatcher
}
@@ -373,9 +319,9 @@ func (p *protocol) SetOption(option tcpip.SettableTransportProtocolOption) tcpip
p.mu.Unlock()
return nil
- case *tcpip.TCPSynRcvdCountThresholdOption:
+ case *tcpip.TCPAlwaysUseSynCookies:
p.mu.Lock()
- p.synRcvdCount.SetThreshold(uint64(*v))
+ p.alwaysUseSynCookies = bool(*v)
p.mu.Unlock()
return nil
@@ -480,9 +426,9 @@ func (p *protocol) Option(option tcpip.GettableTransportProtocolOption) tcpip.Er
p.mu.RUnlock()
return nil
- case *tcpip.TCPSynRcvdCountThresholdOption:
+ case *tcpip.TCPAlwaysUseSynCookies:
p.mu.RLock()
- *v = tcpip.TCPSynRcvdCountThresholdOption(p.synRcvdCount.Threshold())
+ *v = tcpip.TCPAlwaysUseSynCookies(p.alwaysUseSynCookies)
p.mu.RUnlock()
return nil
@@ -507,12 +453,6 @@ func (p *protocol) Wait() {
p.dispatcher.wait()
}
-// SynRcvdCounter returns a reference to the synRcvdCount for this protocol
-// instance.
-func (p *protocol) SynRcvdCounter() *synRcvdCounter {
- return &p.synRcvdCount
-}
-
// Parse implements stack.TransportProtocol.Parse.
func (*protocol) Parse(pkt *stack.PacketBuffer) bool {
return parse.TCP(pkt)
@@ -537,7 +477,6 @@ func NewProtocol(s *stack.Stack) stack.TransportProtocol {
lingerTimeout: DefaultTCPLingerTimeout,
timeWaitTimeout: DefaultTCPTimeWaitTimeout,
timeWaitReuse: tcpip.TCPTimeWaitReuseLoopbackOnly,
- synRcvdCount: synRcvdCounter{threshold: SynRcvdCountThreshold},
synRetries: DefaultSynRetries,
minRTO: MinRTO,
maxRTO: MaxRTO,
diff --git a/pkg/tcpip/transport/tcp/tcp_sack_test.go b/pkg/tcpip/transport/tcp/tcp_sack_test.go
index 81f800cad..20c9761f2 100644
--- a/pkg/tcpip/transport/tcp/tcp_sack_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_sack_test.go
@@ -160,12 +160,9 @@ func TestSackPermittedAccept(t *testing.T) {
defer c.Cleanup()
if tc.cookieEnabled {
- // Set the SynRcvd threshold to
- // zero to force a syn cookie
- // based accept to happen.
- var opt tcpip.TCPSynRcvdCountThresholdOption
+ opt := tcpip.TCPAlwaysUseSynCookies(true)
if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
- t.Fatalf("SetTransportProtocolOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, opt, opt, err)
+ t.Fatalf("SetTransportProtocolOption(%d, &%T(%t)): %s", tcp.ProtocolNumber, opt, opt, err)
}
}
setStackSACKPermitted(t, c, sackEnabled)
@@ -235,12 +232,9 @@ func TestSackDisabledAccept(t *testing.T) {
defer c.Cleanup()
if tc.cookieEnabled {
- // Set the SynRcvd threshold to
- // zero to force a syn cookie
- // based accept to happen.
- var opt tcpip.TCPSynRcvdCountThresholdOption
+ opt := tcpip.TCPAlwaysUseSynCookies(true)
if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
- t.Fatalf("SetTransportProtocolOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, opt, opt, err)
+ t.Fatalf("SetTransportProtocolOption(%d, &%T(%t)): %s", tcp.ProtocolNumber, opt, opt, err)
}
}
diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go
index 9c23469f2..5605a4390 100644
--- a/pkg/tcpip/transport/tcp/tcp_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_test.go
@@ -955,11 +955,7 @@ func TestUserSuppliedMSSOnConnect(t *testing.T) {
// when completing the handshake for a new TCP connection from a TCP
// listening socket. It should be present in the sent TCP SYN-ACK segment.
func TestUserSuppliedMSSOnListenAccept(t *testing.T) {
- const (
- nonSynCookieAccepts = 2
- totalAccepts = 4
- mtu = 5000
- )
+ const mtu = 5000
ips := []struct {
name string
@@ -1033,12 +1029,6 @@ func TestUserSuppliedMSSOnListenAccept(t *testing.T) {
ip.createEP(c)
- // Set the SynRcvd threshold to force a syn cookie based accept to happen.
- opt := tcpip.TCPSynRcvdCountThresholdOption(nonSynCookieAccepts)
- if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
- t.Fatalf("SetTransportProtocolOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, opt, opt, err)
- }
-
if err := c.EP.SetSockOptInt(tcpip.MaxSegOption, int(test.setMSS)); err != nil {
t.Fatalf("SetSockOptInt(MaxSegOption, %d): %s", test.setMSS, err)
}
@@ -1048,13 +1038,17 @@ func TestUserSuppliedMSSOnListenAccept(t *testing.T) {
t.Fatalf("Bind(%+v): %s:", bindAddr, err)
}
- if err := c.EP.Listen(totalAccepts); err != nil {
- t.Fatalf("Listen(%d): %s:", totalAccepts, err)
+ backlog := 5
+ // Keep the number of client requests twice to the backlog
+ // such that half of the connections do not use syncookies
+ // and the other half does.
+ clientConnects := backlog * 2
+
+ if err := c.EP.Listen(backlog); err != nil {
+ t.Fatalf("Listen(%d): %s:", backlog, err)
}
- // The first nonSynCookieAccepts packets sent will trigger a gorooutine
- // based accept. The rest will trigger a cookie based accept.
- for i := 0; i < totalAccepts; i++ {
+ for i := 0; i < clientConnects; i++ {
// Send a SYN requests.
iss := seqnum.Value(i)
srcPort := context.TestPort + uint16(i)
@@ -3087,11 +3081,9 @@ func TestSynCookiePassiveSendMSSLessThanMTU(t *testing.T) {
c := context.New(t, mtu)
defer c.Cleanup()
- // Set the SynRcvd threshold to zero to force a syn cookie based accept
- // to happen.
- opt := tcpip.TCPSynRcvdCountThresholdOption(0)
+ opt := tcpip.TCPAlwaysUseSynCookies(true)
if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
- t.Fatalf("SetTransportProtocolOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, opt, opt, err)
+ t.Fatalf("SetTransportProtocolOption(%d, &%T(%t)): %s", tcp.ProtocolNumber, opt, opt, err)
}
// Create EP and start listening.
@@ -5363,7 +5355,7 @@ func TestListenBacklogFull(t *testing.T) {
}
lastPortOffset := uint16(0)
- for ; int(lastPortOffset) < listenBacklog; lastPortOffset++ {
+ for ; int(lastPortOffset) < listenBacklog+1; lastPortOffset++ {
executeHandshake(t, c, context.TestPort+lastPortOffset, false /*synCookieInUse */)
}
@@ -5671,15 +5663,13 @@ func TestListenSynRcvdQueueFull(t *testing.T) {
}
// Test acceptance.
- // Start listening.
- listenBacklog := 1
- if err := c.EP.Listen(listenBacklog); err != nil {
+ if err := c.EP.Listen(0); err != nil {
t.Fatalf("Listen failed: %s", err)
}
// Send two SYN's the first one should get a SYN-ACK, the
// second one should not get any response and is dropped as
- // the synRcvd count will be equal to backlog.
+ // the accept queue is full.
irs := seqnum.Value(context.TestInitialSequenceNumber)
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
@@ -5701,23 +5691,7 @@ func TestListenSynRcvdQueueFull(t *testing.T) {
}
checker.IPv4(t, b, checker.TCP(tcpCheckers...))
- // Now execute send one more SYN. The stack should not respond as the backlog
- // is full at this point.
- //
- // NOTE: we did not complete the handshake for the previous one so the
- // accept backlog should be empty and there should be one connection in
- // synRcvd state.
- c.SendPacket(nil, &context.Headers{
- SrcPort: context.TestPort + 1,
- DstPort: context.StackPort,
- Flags: header.TCPFlagSyn,
- SeqNum: seqnum.Value(889),
- RcvWnd: 30000,
- })
- c.CheckNoPacketTimeout("unexpected packet received", 50*time.Millisecond)
-
- // Now complete the previous connection and verify that there is a connection
- // to accept.
+ // Now complete the previous connection.
// Send ACK.
c.SendPacket(nil, &context.Headers{
SrcPort: context.TestPort,
@@ -5728,11 +5702,24 @@ func TestListenSynRcvdQueueFull(t *testing.T) {
RcvWnd: 30000,
})
- // Try to accept the connections in the backlog.
+ // Verify if that is delivered to the accept queue.
we, ch := waiter.NewChannelEntry(nil)
c.WQ.EventRegister(&we, waiter.ReadableEvents)
defer c.WQ.EventUnregister(&we)
+ <-ch
+
+ // Now execute send one more SYN. The stack should not respond as the backlog
+ // is full at this point.
+ c.SendPacket(nil, &context.Headers{
+ SrcPort: context.TestPort + 1,
+ DstPort: context.StackPort,
+ Flags: header.TCPFlagSyn,
+ SeqNum: seqnum.Value(889),
+ RcvWnd: 30000,
+ })
+ c.CheckNoPacketTimeout("unexpected packet received", 50*time.Millisecond)
+ // Try to accept the connections in the backlog.
newEP, _, err := c.EP.Accept(nil)
if _, ok := err.(*tcpip.ErrWouldBlock); ok {
// Wait for connection to be established.
@@ -5764,11 +5751,6 @@ func TestListenBacklogFullSynCookieInUse(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
- opt := tcpip.TCPSynRcvdCountThresholdOption(1)
- if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
- t.Fatalf("SetTransportProtocolOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, opt, opt, err)
- }
-
// Create TCP endpoint.
var err tcpip.Error
c.EP, err = c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &c.WQ)
@@ -5781,9 +5763,8 @@ func TestListenBacklogFullSynCookieInUse(t *testing.T) {
t.Fatalf("Bind failed: %s", err)
}
- // Start listening.
- listenBacklog := 1
- if err := c.EP.Listen(listenBacklog); err != nil {
+ // Test for SynCookies usage after filling up the backlog.
+ if err := c.EP.Listen(0); err != nil {
t.Fatalf("Listen failed: %s", err)
}
@@ -6066,7 +6047,7 @@ func TestPassiveFailedConnectionAttemptIncrement(t *testing.T) {
if err := c.EP.Bind(tcpip.FullAddress{Addr: context.StackAddr, Port: context.StackPort}); err != nil {
t.Fatalf("Bind failed: %s", err)
}
- if err := c.EP.Listen(1); err != nil {
+ if err := c.EP.Listen(0); err != nil {
t.Fatalf("Listen failed: %s", err)
}
diff --git a/pkg/tcpip/transport/tcp/tcp_timestamp_test.go b/pkg/tcpip/transport/tcp/tcp_timestamp_test.go
index 2949588ce..1deb1fe4d 100644
--- a/pkg/tcpip/transport/tcp/tcp_timestamp_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_timestamp_test.go
@@ -139,9 +139,9 @@ func timeStampEnabledAccept(t *testing.T, cookieEnabled bool, wndScale int, wndS
defer c.Cleanup()
if cookieEnabled {
- var opt tcpip.TCPSynRcvdCountThresholdOption
+ opt := tcpip.TCPAlwaysUseSynCookies(true)
if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
- t.Fatalf("SetTransportProtocolOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, opt, opt, err)
+ t.Fatalf("SetTransportProtocolOption(%d, &%T(%t)): %s", tcp.ProtocolNumber, opt, opt, err)
}
}
@@ -202,9 +202,9 @@ func timeStampDisabledAccept(t *testing.T, cookieEnabled bool, wndScale int, wnd
defer c.Cleanup()
if cookieEnabled {
- var opt tcpip.TCPSynRcvdCountThresholdOption
+ opt := tcpip.TCPAlwaysUseSynCookies(true)
if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, &opt); err != nil {
- t.Fatalf("SetTransportProtocolOption(%d, &%T(%d)): %s", tcp.ProtocolNumber, opt, opt, err)
+ t.Fatalf("SetTransportProtocolOption(%d, &%T(%t)): %s", tcp.ProtocolNumber, opt, opt, err)
}
}
diff --git a/pkg/test/dockerutil/container.go b/pkg/test/dockerutil/container.go
index 41fcf4978..06152a444 100644
--- a/pkg/test/dockerutil/container.go
+++ b/pkg/test/dockerutil/container.go
@@ -434,7 +434,14 @@ func (c *Container) Wait(ctx context.Context) error {
select {
case err := <-errChan:
return err
- case <-statusChan:
+ case res := <-statusChan:
+ if res.StatusCode != 0 {
+ var msg string
+ if res.Error != nil {
+ msg = res.Error.Message
+ }
+ return fmt.Errorf("container returned non-zero status: %d, msg: %q", res.StatusCode, msg)
+ }
return nil
}
}
diff --git a/pkg/usermem/BUILD b/pkg/usermem/BUILD
index 054269b59..3dba36f12 100644
--- a/pkg/usermem/BUILD
+++ b/pkg/usermem/BUILD
@@ -1,42 +1,22 @@
load("//tools:defs.bzl", "go_library", "go_test")
-load("//tools/go_generics:defs.bzl", "go_template_instance")
package(licenses = ["notice"])
-go_template_instance(
- name = "addr_range",
- out = "addr_range.go",
- package = "usermem",
- prefix = "Addr",
- template = "//pkg/segment:generic_range",
- types = {
- "T": "Addr",
- },
-)
-
go_library(
name = "usermem",
srcs = [
- "access_type.go",
- "addr.go",
- "addr_range.go",
- "addr_range_seq_unsafe.go",
"bytes_io.go",
"bytes_io_unsafe.go",
"usermem.go",
- "usermem_arm64.go",
- "usermem_x86.go",
],
visibility = ["//:sandbox"],
deps = [
"//pkg/atomicbitops",
- "//pkg/binary",
"//pkg/context",
"//pkg/gohacks",
- "//pkg/log",
+ "//pkg/hostarch",
"//pkg/safemem",
"//pkg/syserror",
- "@org_golang_x_sys//unix:go_default_library",
],
)
@@ -44,12 +24,12 @@ go_test(
name = "usermem_test",
size = "small",
srcs = [
- "addr_range_seq_test.go",
"usermem_test.go",
],
library = ":usermem",
deps = [
"//pkg/context",
+ "//pkg/hostarch",
"//pkg/safemem",
"//pkg/syserror",
],
diff --git a/pkg/usermem/bytes_io.go b/pkg/usermem/bytes_io.go
index e177d30eb..3da3c0294 100644
--- a/pkg/usermem/bytes_io.go
+++ b/pkg/usermem/bytes_io.go
@@ -16,6 +16,7 @@ package usermem
import (
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/syserror"
)
@@ -30,7 +31,7 @@ type BytesIO struct {
}
// CopyOut implements IO.CopyOut.
-func (b *BytesIO) CopyOut(ctx context.Context, addr Addr, src []byte, opts IOOpts) (int, error) {
+func (b *BytesIO) CopyOut(ctx context.Context, addr hostarch.Addr, src []byte, opts IOOpts) (int, error) {
rngN, rngErr := b.rangeCheck(addr, len(src))
if rngN == 0 {
return 0, rngErr
@@ -39,7 +40,7 @@ func (b *BytesIO) CopyOut(ctx context.Context, addr Addr, src []byte, opts IOOpt
}
// CopyIn implements IO.CopyIn.
-func (b *BytesIO) CopyIn(ctx context.Context, addr Addr, dst []byte, opts IOOpts) (int, error) {
+func (b *BytesIO) CopyIn(ctx context.Context, addr hostarch.Addr, dst []byte, opts IOOpts) (int, error) {
rngN, rngErr := b.rangeCheck(addr, len(dst))
if rngN == 0 {
return 0, rngErr
@@ -48,7 +49,7 @@ func (b *BytesIO) CopyIn(ctx context.Context, addr Addr, dst []byte, opts IOOpts
}
// ZeroOut implements IO.ZeroOut.
-func (b *BytesIO) ZeroOut(ctx context.Context, addr Addr, toZero int64, opts IOOpts) (int64, error) {
+func (b *BytesIO) ZeroOut(ctx context.Context, addr hostarch.Addr, toZero int64, opts IOOpts) (int64, error) {
if toZero > int64(maxInt) {
return 0, syserror.EINVAL
}
@@ -64,7 +65,7 @@ func (b *BytesIO) ZeroOut(ctx context.Context, addr Addr, toZero int64, opts IOO
}
// CopyOutFrom implements IO.CopyOutFrom.
-func (b *BytesIO) CopyOutFrom(ctx context.Context, ars AddrRangeSeq, src safemem.Reader, opts IOOpts) (int64, error) {
+func (b *BytesIO) CopyOutFrom(ctx context.Context, ars hostarch.AddrRangeSeq, src safemem.Reader, opts IOOpts) (int64, error) {
dsts, rngErr := b.blocksFromAddrRanges(ars)
n, err := src.ReadToBlocks(dsts)
if err != nil {
@@ -74,7 +75,7 @@ func (b *BytesIO) CopyOutFrom(ctx context.Context, ars AddrRangeSeq, src safemem
}
// CopyInTo implements IO.CopyInTo.
-func (b *BytesIO) CopyInTo(ctx context.Context, ars AddrRangeSeq, dst safemem.Writer, opts IOOpts) (int64, error) {
+func (b *BytesIO) CopyInTo(ctx context.Context, ars hostarch.AddrRangeSeq, dst safemem.Writer, opts IOOpts) (int64, error) {
srcs, rngErr := b.blocksFromAddrRanges(ars)
n, err := dst.WriteFromBlocks(srcs)
if err != nil {
@@ -83,14 +84,14 @@ func (b *BytesIO) CopyInTo(ctx context.Context, ars AddrRangeSeq, dst safemem.Wr
return int64(n), rngErr
}
-func (b *BytesIO) rangeCheck(addr Addr, length int) (int, error) {
+func (b *BytesIO) rangeCheck(addr hostarch.Addr, length int) (int, error) {
if length == 0 {
return 0, nil
}
if length < 0 {
return 0, syserror.EINVAL
}
- max := Addr(len(b.Bytes))
+ max := hostarch.Addr(len(b.Bytes))
if addr >= max {
return 0, syserror.EFAULT
}
@@ -101,7 +102,7 @@ func (b *BytesIO) rangeCheck(addr Addr, length int) (int, error) {
return length, nil
}
-func (b *BytesIO) blocksFromAddrRanges(ars AddrRangeSeq) (safemem.BlockSeq, error) {
+func (b *BytesIO) blocksFromAddrRanges(ars hostarch.AddrRangeSeq) (safemem.BlockSeq, error) {
switch ars.NumRanges() {
case 0:
return safemem.BlockSeq{}, nil
@@ -124,7 +125,7 @@ func (b *BytesIO) blocksFromAddrRanges(ars AddrRangeSeq) (safemem.BlockSeq, erro
}
}
-func (b *BytesIO) blockFromAddrRange(ar AddrRange) (safemem.Block, error) {
+func (b *BytesIO) blockFromAddrRange(ar hostarch.AddrRange) (safemem.Block, error) {
n, err := b.rangeCheck(ar.Start, int(ar.Length()))
if n == 0 {
return safemem.Block{}, err
@@ -136,6 +137,6 @@ func (b *BytesIO) blockFromAddrRange(ar AddrRange) (safemem.Block, error) {
func BytesIOSequence(buf []byte) IOSequence {
return IOSequence{
IO: &BytesIO{buf},
- Addrs: AddrRangeSeqOf(AddrRange{0, Addr(len(buf))}),
+ Addrs: hostarch.AddrRangeSeqOf(hostarch.AddrRange{0, hostarch.Addr(len(buf))}),
}
}
diff --git a/pkg/usermem/bytes_io_unsafe.go b/pkg/usermem/bytes_io_unsafe.go
index 20de5037d..dcd5c81d1 100644
--- a/pkg/usermem/bytes_io_unsafe.go
+++ b/pkg/usermem/bytes_io_unsafe.go
@@ -20,10 +20,11 @@ import (
"gvisor.dev/gvisor/pkg/atomicbitops"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
)
// SwapUint32 implements IO.SwapUint32.
-func (b *BytesIO) SwapUint32(ctx context.Context, addr Addr, new uint32, opts IOOpts) (uint32, error) {
+func (b *BytesIO) SwapUint32(ctx context.Context, addr hostarch.Addr, new uint32, opts IOOpts) (uint32, error) {
if _, rngErr := b.rangeCheck(addr, 4); rngErr != nil {
return 0, rngErr
}
@@ -31,7 +32,7 @@ func (b *BytesIO) SwapUint32(ctx context.Context, addr Addr, new uint32, opts IO
}
// CompareAndSwapUint32 implements IO.CompareAndSwapUint32.
-func (b *BytesIO) CompareAndSwapUint32(ctx context.Context, addr Addr, old, new uint32, opts IOOpts) (uint32, error) {
+func (b *BytesIO) CompareAndSwapUint32(ctx context.Context, addr hostarch.Addr, old, new uint32, opts IOOpts) (uint32, error) {
if _, rngErr := b.rangeCheck(addr, 4); rngErr != nil {
return 0, rngErr
}
@@ -39,7 +40,7 @@ func (b *BytesIO) CompareAndSwapUint32(ctx context.Context, addr Addr, old, new
}
// LoadUint32 implements IO.LoadUint32.
-func (b *BytesIO) LoadUint32(ctx context.Context, addr Addr, opts IOOpts) (uint32, error) {
+func (b *BytesIO) LoadUint32(ctx context.Context, addr hostarch.Addr, opts IOOpts) (uint32, error) {
if _, err := b.rangeCheck(addr, 4); err != nil {
return 0, err
}
diff --git a/pkg/usermem/usermem.go b/pkg/usermem/usermem.go
index dc2571154..0d6d25e50 100644
--- a/pkg/usermem/usermem.go
+++ b/pkg/usermem/usermem.go
@@ -25,6 +25,8 @@ import (
"gvisor.dev/gvisor/pkg/gohacks"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/syserror"
+
+ "gvisor.dev/gvisor/pkg/hostarch"
)
// IO provides access to the contents of a virtual memory space.
@@ -37,7 +39,7 @@ type IO interface {
// any following locks in the lock order.
//
// Postconditions: CopyOut does not retain src.
- CopyOut(ctx context.Context, addr Addr, src []byte, opts IOOpts) (int, error)
+ CopyOut(ctx context.Context, addr hostarch.Addr, src []byte, opts IOOpts) (int, error)
// CopyIn copies len(dst) bytes from the memory mapped at addr to dst.
// It returns the number of bytes copied. If the number of bytes copied is
@@ -47,7 +49,7 @@ type IO interface {
// any following locks in the lock order.
//
// Postconditions: CopyIn does not retain dst.
- CopyIn(ctx context.Context, addr Addr, dst []byte, opts IOOpts) (int, error)
+ CopyIn(ctx context.Context, addr hostarch.Addr, dst []byte, opts IOOpts) (int, error)
// ZeroOut sets toZero bytes to 0, starting at addr. It returns the number
// of bytes zeroed. If the number of bytes zeroed is < toZero, it returns a
@@ -57,7 +59,7 @@ type IO interface {
// * The caller must not hold mm.MemoryManager.mappingMu or any
// following locks in the lock order.
// * toZero >= 0.
- ZeroOut(ctx context.Context, addr Addr, toZero int64, opts IOOpts) (int64, error)
+ ZeroOut(ctx context.Context, addr hostarch.Addr, toZero int64, opts IOOpts) (int64, error)
// CopyOutFrom copies ars.NumBytes() bytes from src to the memory mapped at
// ars. It returns the number of bytes copied, which may be less than the
@@ -72,7 +74,7 @@ type IO interface {
// following locks in the lock order.
// * src.ReadToBlocks must not block on mm.MemoryManager.activeMu or
// any preceding locks in the lock order.
- CopyOutFrom(ctx context.Context, ars AddrRangeSeq, src safemem.Reader, opts IOOpts) (int64, error)
+ CopyOutFrom(ctx context.Context, ars hostarch.AddrRangeSeq, src safemem.Reader, opts IOOpts) (int64, error)
// CopyInTo copies ars.NumBytes() bytes from the memory mapped at ars to
// dst. It returns the number of bytes copied. CopyInTo may return a
@@ -86,7 +88,7 @@ type IO interface {
// following locks in the lock order.
// * dst.WriteFromBlocks must not block on mm.MemoryManager.activeMu or
// any preceding locks in the lock order.
- CopyInTo(ctx context.Context, ars AddrRangeSeq, dst safemem.Writer, opts IOOpts) (int64, error)
+ CopyInTo(ctx context.Context, ars hostarch.AddrRangeSeq, dst safemem.Writer, opts IOOpts) (int64, error)
// TODO(jamieliu): The requirement that CopyOutFrom/CopyInTo call src/dst
// at most once, which is unnecessary in most cases, forces implementations
@@ -101,7 +103,7 @@ type IO interface {
// * The caller must not hold mm.MemoryManager.mappingMu or any
// following locks in the lock order.
// * addr must be aligned to a 4-byte boundary.
- SwapUint32(ctx context.Context, addr Addr, new uint32, opts IOOpts) (uint32, error)
+ SwapUint32(ctx context.Context, addr hostarch.Addr, new uint32, opts IOOpts) (uint32, error)
// CompareAndSwapUint32 atomically compares the uint32 value at addr to
// old; if they are equal, the value in memory is replaced by new. In
@@ -111,7 +113,7 @@ type IO interface {
// * The caller must not hold mm.MemoryManager.mappingMu or any
// following locks in the lock order.
// * addr must be aligned to a 4-byte boundary.
- CompareAndSwapUint32(ctx context.Context, addr Addr, old, new uint32, opts IOOpts) (uint32, error)
+ CompareAndSwapUint32(ctx context.Context, addr hostarch.Addr, old, new uint32, opts IOOpts) (uint32, error)
// LoadUint32 atomically loads the uint32 value at addr and returns it.
//
@@ -119,7 +121,7 @@ type IO interface {
// * The caller must not hold mm.MemoryManager.mappingMu or any
// following locks in the lock order.
// * addr must be aligned to a 4-byte boundary.
- LoadUint32(ctx context.Context, addr Addr, opts IOOpts) (uint32, error)
+ LoadUint32(ctx context.Context, addr hostarch.Addr, opts IOOpts) (uint32, error)
}
// IOOpts contains options applicable to all IO methods.
@@ -142,7 +144,7 @@ type IOOpts struct {
type IOReadWriter struct {
Ctx context.Context
IO IO
- Addr Addr
+ Addr hostarch.Addr
Opts IOOpts
}
@@ -159,7 +161,7 @@ func (rw *IOReadWriter) Read(dst []byte) (int, error) {
rw.Addr = end
} else {
// Disallow wraparound.
- rw.Addr = ^Addr(0)
+ rw.Addr = ^hostarch.Addr(0)
if err != nil {
err = syserror.EFAULT
}
@@ -175,7 +177,7 @@ func (rw *IOReadWriter) Write(src []byte) (int, error) {
rw.Addr = end
} else {
// Disallow wraparound.
- rw.Addr = ^Addr(0)
+ rw.Addr = ^hostarch.Addr(0)
if err != nil {
err = syserror.EFAULT
}
@@ -197,7 +199,7 @@ const (
//
// Preconditions: Same as IO.CopyFromUser, plus:
// * maxlen >= 0.
-func CopyStringIn(ctx context.Context, uio IO, addr Addr, maxlen int, opts IOOpts) (string, error) {
+func CopyStringIn(ctx context.Context, uio IO, addr hostarch.Addr, maxlen int, opts IOOpts) (string, error) {
initLen := maxlen
if initLen > copyStringMaxInitBufLen {
initLen = copyStringMaxInitBufLen
@@ -251,12 +253,12 @@ func CopyStringIn(ctx context.Context, uio IO, addr Addr, maxlen int, opts IOOpt
// the maximum, it returns a non-nil error explaining why.
//
// Preconditions: Same as IO.CopyOut.
-func CopyOutVec(ctx context.Context, uio IO, ars AddrRangeSeq, src []byte, opts IOOpts) (int, error) {
+func CopyOutVec(ctx context.Context, uio IO, ars hostarch.AddrRangeSeq, src []byte, opts IOOpts) (int, error) {
var done int
for !ars.IsEmpty() && done < len(src) {
ar := ars.Head()
cplen := len(src) - done
- if Addr(cplen) >= ar.Length() {
+ if hostarch.Addr(cplen) >= ar.Length() {
cplen = int(ar.Length())
}
n, err := uio.CopyOut(ctx, ar.Start, src[done:done+cplen], opts)
@@ -275,12 +277,12 @@ func CopyOutVec(ctx context.Context, uio IO, ars AddrRangeSeq, src []byte, opts
// maximum, it returns a non-nil error explaining why.
//
// Preconditions: Same as IO.CopyIn.
-func CopyInVec(ctx context.Context, uio IO, ars AddrRangeSeq, dst []byte, opts IOOpts) (int, error) {
+func CopyInVec(ctx context.Context, uio IO, ars hostarch.AddrRangeSeq, dst []byte, opts IOOpts) (int, error) {
var done int
for !ars.IsEmpty() && done < len(dst) {
ar := ars.Head()
cplen := len(dst) - done
- if Addr(cplen) >= ar.Length() {
+ if hostarch.Addr(cplen) >= ar.Length() {
cplen = int(ar.Length())
}
n, err := uio.CopyIn(ctx, ar.Start, dst[done:done+cplen], opts)
@@ -299,12 +301,12 @@ func CopyInVec(ctx context.Context, uio IO, ars AddrRangeSeq, dst []byte, opts I
// maximum, it returns a non-nil error explaining why.
//
// Preconditions: Same as IO.ZeroOut.
-func ZeroOutVec(ctx context.Context, uio IO, ars AddrRangeSeq, toZero int64, opts IOOpts) (int64, error) {
+func ZeroOutVec(ctx context.Context, uio IO, ars hostarch.AddrRangeSeq, toZero int64, opts IOOpts) (int64, error) {
var done int64
for !ars.IsEmpty() && done < toZero {
ar := ars.Head()
cplen := toZero - done
- if Addr(cplen) >= ar.Length() {
+ if hostarch.Addr(cplen) >= ar.Length() {
cplen = int64(ar.Length())
}
n, err := uio.ZeroOut(ctx, ar.Start, cplen, opts)
@@ -352,7 +354,7 @@ func isASCIIWhitespace(b byte) bool {
// - CopyInt32StringsInVec returns EINVAL if ars.NumBytes() == 0.
//
// Preconditions: Same as CopyInVec.
-func CopyInt32StringsInVec(ctx context.Context, uio IO, ars AddrRangeSeq, dsts []int32, opts IOOpts) (int64, error) {
+func CopyInt32StringsInVec(ctx context.Context, uio IO, ars hostarch.AddrRangeSeq, dsts []int32, opts IOOpts) (int64, error) {
if len(dsts) == 0 {
return 0, nil
}
@@ -403,7 +405,7 @@ func CopyInt32StringsInVec(ctx context.Context, uio IO, ars AddrRangeSeq, dsts [
// CopyInt32StringInVec is equivalent to CopyInt32StringsInVec, but copies at
// most one int32.
-func CopyInt32StringInVec(ctx context.Context, uio IO, ars AddrRangeSeq, dst *int32, opts IOOpts) (int64, error) {
+func CopyInt32StringInVec(ctx context.Context, uio IO, ars hostarch.AddrRangeSeq, dst *int32, opts IOOpts) (int64, error) {
dsts := [1]int32{*dst}
n, err := CopyInt32StringsInVec(ctx, uio, ars, dsts[:], opts)
*dst = dsts[0]
@@ -413,7 +415,7 @@ func CopyInt32StringInVec(ctx context.Context, uio IO, ars AddrRangeSeq, dst *in
// IOSequence holds arguments to IO methods.
type IOSequence struct {
IO IO
- Addrs AddrRangeSeq
+ Addrs hostarch.AddrRangeSeq
Opts IOOpts
}
@@ -444,28 +446,28 @@ func (s IOSequence) NumBytes() int64 {
// DropFirst returns a copy of s with s.Addrs.DropFirst(n).
//
-// Preconditions: Same as AddrRangeSeq.DropFirst.
+// Preconditions: Same as hostarch.AddrRangeSeq.DropFirst.
func (s IOSequence) DropFirst(n int) IOSequence {
return IOSequence{s.IO, s.Addrs.DropFirst(n), s.Opts}
}
// DropFirst64 returns a copy of s with s.Addrs.DropFirst64(n).
//
-// Preconditions: Same as AddrRangeSeq.DropFirst64.
+// Preconditions: Same as hostarch.AddrRangeSeq.DropFirst64.
func (s IOSequence) DropFirst64(n int64) IOSequence {
return IOSequence{s.IO, s.Addrs.DropFirst64(n), s.Opts}
}
// TakeFirst returns a copy of s with s.Addrs.TakeFirst(n).
//
-// Preconditions: Same as AddrRangeSeq.TakeFirst.
+// Preconditions: Same as hostarch.AddrRangeSeq.TakeFirst.
func (s IOSequence) TakeFirst(n int) IOSequence {
return IOSequence{s.IO, s.Addrs.TakeFirst(n), s.Opts}
}
// TakeFirst64 returns a copy of s with s.Addrs.TakeFirst64(n).
//
-// Preconditions: Same as AddrRangeSeq.TakeFirst64.
+// Preconditions: Same as hostarch.AddrRangeSeq.TakeFirst64.
func (s IOSequence) TakeFirst64(n int64) IOSequence {
return IOSequence{s.IO, s.Addrs.TakeFirst64(n), s.Opts}
}
diff --git a/pkg/usermem/usermem_test.go b/pkg/usermem/usermem_test.go
index da60b0cc7..9b697b593 100644
--- a/pkg/usermem/usermem_test.go
+++ b/pkg/usermem/usermem_test.go
@@ -22,6 +22,7 @@ import (
"testing"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/syserror"
)
@@ -106,7 +107,7 @@ func TestBytesIOZeroOutFailure(t *testing.T) {
func TestBytesIOCopyOutFromSuccess(t *testing.T) {
b := newBytesIOString("ABCDEFGH")
- n, err := b.CopyOutFrom(newContext(), AddrRangeSeqFromSlice([]AddrRange{
+ n, err := b.CopyOutFrom(newContext(), hostarch.AddrRangeSeqFromSlice([]hostarch.AddrRange{
{Start: 4, End: 7},
{Start: 1, End: 4},
}), safemem.FromIOReader{bytes.NewBufferString("barfoo")}, IOOpts{})
@@ -120,7 +121,7 @@ func TestBytesIOCopyOutFromSuccess(t *testing.T) {
func TestBytesIOCopyOutFromFailure(t *testing.T) {
b := newBytesIOString("ABCDE")
- n, err := b.CopyOutFrom(newContext(), AddrRangeSeqFromSlice([]AddrRange{
+ n, err := b.CopyOutFrom(newContext(), hostarch.AddrRangeSeqFromSlice([]hostarch.AddrRange{
{Start: 1, End: 4},
{Start: 4, End: 7},
}), safemem.FromIOReader{bytes.NewBufferString("foobar")}, IOOpts{})
@@ -135,7 +136,7 @@ func TestBytesIOCopyOutFromFailure(t *testing.T) {
func TestBytesIOCopyInToSuccess(t *testing.T) {
b := newBytesIOString("AfoobarH")
var dst bytes.Buffer
- n, err := b.CopyInTo(newContext(), AddrRangeSeqFromSlice([]AddrRange{
+ n, err := b.CopyInTo(newContext(), hostarch.AddrRangeSeqFromSlice([]hostarch.AddrRange{
{Start: 4, End: 7},
{Start: 1, End: 4},
}), safemem.FromIOWriter{&dst}, IOOpts{})
@@ -150,7 +151,7 @@ func TestBytesIOCopyInToSuccess(t *testing.T) {
func TestBytesIOCopyInToFailure(t *testing.T) {
b := newBytesIOString("Afoob")
var dst bytes.Buffer
- n, err := b.CopyInTo(newContext(), AddrRangeSeqFromSlice([]AddrRange{
+ n, err := b.CopyInTo(newContext(), hostarch.AddrRangeSeqFromSlice([]hostarch.AddrRange{
{Start: 1, End: 4},
{Start: 4, End: 7},
}), safemem.FromIOWriter{&dst}, IOOpts{})
diff --git a/runsc/BUILD b/runsc/BUILD
index 3b91b984a..e99404eb1 100644
--- a/runsc/BUILD
+++ b/runsc/BUILD
@@ -9,6 +9,7 @@ go_binary(
"version.go",
],
pure = True,
+ tags = ["staging"],
visibility = [
"//visibility:public",
],
diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD
index 67307ab3c..579edaa2c 100644
--- a/runsc/boot/BUILD
+++ b/runsc/boot/BUILD
@@ -57,6 +57,7 @@ go_library(
"//pkg/sentry/fs/tmpfs",
"//pkg/sentry/fs/tty",
"//pkg/sentry/fs/user",
+ "//pkg/sentry/fsimpl/cgroupfs",
"//pkg/sentry/fsimpl/devpts",
"//pkg/sentry/fsimpl/devtmpfs",
"//pkg/sentry/fsimpl/fuse",
@@ -66,6 +67,7 @@ go_library(
"//pkg/sentry/fsimpl/proc",
"//pkg/sentry/fsimpl/sys",
"//pkg/sentry/fsimpl/tmpfs",
+ "//pkg/sentry/fsimpl/verity",
"//pkg/sentry/inet",
"//pkg/sentry/kernel",
"//pkg/sentry/kernel:uncaught_signal_go_proto",
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go
index 1ae76d7d7..05b721b28 100644
--- a/runsc/boot/controller.go
+++ b/runsc/boot/controller.go
@@ -400,7 +400,7 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
// Set up the restore environment.
ctx := k.SupervisorContext()
- mntr := newContainerMounter(cm.l.root.spec, cm.l.root.goferFDs, cm.l.k, cm.l.mountHints, kernel.VFS2Enabled)
+ mntr := newContainerMounter(&cm.l.root, cm.l.k, cm.l.mountHints, kernel.VFS2Enabled)
if kernel.VFS2Enabled {
ctx, err = mntr.configureRestore(ctx, cm.l.root.conf)
if err != nil {
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index 32adde643..3c0cef6db 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -31,6 +31,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/fs/gofer"
"gvisor.dev/gvisor/pkg/sentry/fs/ramfs"
"gvisor.dev/gvisor/pkg/sentry/fs/user"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/cgroupfs"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/devpts"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs"
gofervfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/gofer"
@@ -103,17 +104,22 @@ func addOverlay(ctx context.Context, conf *config.Config, lower *fs.Inode, name
// compileMounts returns the supported mounts from the mount spec, adding any
// mandatory mounts that are required by the OCI specification.
-func compileMounts(spec *specs.Spec, vfs2Enabled bool) []specs.Mount {
+func compileMounts(spec *specs.Spec, conf *config.Config, vfs2Enabled bool) []specs.Mount {
// Keep track of whether proc and sys were mounted.
var procMounted, sysMounted, devMounted, devptsMounted bool
var mounts []specs.Mount
// Mount all submounts from the spec.
for _, m := range spec.Mounts {
- if !vfs2Enabled && !specutils.IsVFS1SupportedDevMount(m) {
+ if !specutils.IsSupportedDevMount(m, vfs2Enabled) {
log.Warningf("ignoring dev mount at %q", m.Destination)
continue
}
+ // Unconditionally drop any cgroupfs mounts. If requested, we'll add our
+ // own below.
+ if m.Type == cgroupfs.Name {
+ continue
+ }
switch filepath.Clean(m.Destination) {
case "/proc":
procMounted = true
@@ -132,6 +138,24 @@ func compileMounts(spec *specs.Spec, vfs2Enabled bool) []specs.Mount {
// Mount proc and sys even if the user did not ask for it, as the spec
// says we SHOULD.
var mandatoryMounts []specs.Mount
+
+ if conf.Cgroupfs {
+ mandatoryMounts = append(mandatoryMounts, specs.Mount{
+ Type: tmpfsvfs2.Name,
+ Destination: "/sys/fs/cgroup",
+ })
+ mandatoryMounts = append(mandatoryMounts, specs.Mount{
+ Type: cgroupfs.Name,
+ Destination: "/sys/fs/cgroup/memory",
+ Options: []string{"memory"},
+ })
+ mandatoryMounts = append(mandatoryMounts, specs.Mount{
+ Type: cgroupfs.Name,
+ Destination: "/sys/fs/cgroup/cpu",
+ Options: []string{"cpu"},
+ })
+ }
+
if !procMounted {
mandatoryMounts = append(mandatoryMounts, specs.Mount{
Type: procvfs2.Name,
@@ -248,6 +272,10 @@ func isSupportedMountFlag(fstype, opt string) bool {
ok, err := parseMountOption(opt, tmpfsAllowedData...)
return ok && err == nil
}
+ if fstype == cgroupfs.Name {
+ ok, err := parseMountOption(opt, cgroupfs.SupportedMountOptions...)
+ return ok && err == nil
+ }
return false
}
@@ -572,11 +600,11 @@ type containerMounter struct {
hints *podMountHints
}
-func newContainerMounter(spec *specs.Spec, goferFDs []*fd.FD, k *kernel.Kernel, hints *podMountHints, vfs2Enabled bool) *containerMounter {
+func newContainerMounter(info *containerInfo, k *kernel.Kernel, hints *podMountHints, vfs2Enabled bool) *containerMounter {
return &containerMounter{
- root: spec.Root,
- mounts: compileMounts(spec, vfs2Enabled),
- fds: fdDispenser{fds: goferFDs},
+ root: info.spec.Root,
+ mounts: compileMounts(info.spec, info.conf, vfs2Enabled),
+ fds: fdDispenser{fds: info.goferFDs},
k: k,
hints: hints,
}
@@ -795,7 +823,13 @@ func (c *containerMounter) getMountNameAndOptions(conf *config.Config, m specs.M
opts = p9MountData(fd, c.getMountAccessType(conf, m), conf.VFS2)
// If configured, add overlay to all writable mounts.
useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly
-
+ case cgroupfs.Name:
+ fsName = m.Type
+ var err error
+ opts, err = parseAndFilterOptions(m.Options, cgroupfs.SupportedMountOptions...)
+ if err != nil {
+ return "", nil, false, err
+ }
default:
log.Warningf("ignoring unknown filesystem type %q", m.Type)
}
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index 774621970..95daf1f00 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -752,7 +752,7 @@ func (l *Loader) createContainerProcess(root bool, cid string, info *containerIn
// Setup the child container file system.
l.startGoferMonitor(cid, info.goferFDs)
- mntr := newContainerMounter(info.spec, info.goferFDs, l.k, l.mountHints, kernel.VFS2Enabled)
+ mntr := newContainerMounter(info, l.k, l.mountHints, kernel.VFS2Enabled)
if root {
if err := mntr.processHints(info.conf, info.procArgs.Credentials); err != nil {
return nil, nil, nil, err
diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go
index 8b39bc59a..93c476971 100644
--- a/runsc/boot/loader_test.go
+++ b/runsc/boot/loader_test.go
@@ -439,7 +439,13 @@ func TestCreateMountNamespace(t *testing.T) {
}
defer cleanup()
- mntr := newContainerMounter(&tc.spec, []*fd.FD{fd.New(sandEnd)}, nil, &podMountHints{}, false /* vfs2Enabled */)
+ info := containerInfo{
+ conf: conf,
+ spec: &tc.spec,
+ goferFDs: []*fd.FD{fd.New(sandEnd)},
+ }
+
+ mntr := newContainerMounter(&info, nil, &podMountHints{}, false /* vfs2Enabled */)
mns, err := mntr.createMountNamespace(ctx, conf)
if err != nil {
t.Fatalf("failed to create mount namespace: %v", err)
@@ -479,7 +485,7 @@ func TestCreateMountNamespaceVFS2(t *testing.T) {
defer l.Destroy()
defer loaderCleanup()
- mntr := newContainerMounter(l.root.spec, l.root.goferFDs, l.k, l.mountHints, true /* vfs2Enabled */)
+ mntr := newContainerMounter(&l.root, l.k, l.mountHints, true /* vfs2Enabled */)
if err := mntr.processHints(l.root.conf, l.root.procArgs.Credentials); err != nil {
t.Fatalf("failed process hints: %v", err)
}
@@ -702,7 +708,12 @@ func TestRestoreEnvironment(t *testing.T) {
for _, ioFD := range tc.ioFDs {
ioFDs = append(ioFDs, fd.New(ioFD))
}
- mntr := newContainerMounter(tc.spec, ioFDs, nil, &podMountHints{}, false /* vfs2Enabled */)
+ info := containerInfo{
+ conf: conf,
+ spec: tc.spec,
+ goferFDs: ioFDs,
+ }
+ mntr := newContainerMounter(&info, nil, &podMountHints{}, false /* vfs2Enabled */)
actualRenv, err := mntr.createRestoreEnvironment(conf)
if !tc.errorExpected && err != nil {
t.Fatalf("could not create restore environment for test:%s", tc.name)
diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go
index 9b3dacf46..7d8fd0483 100644
--- a/runsc/boot/vfs.go
+++ b/runsc/boot/vfs.go
@@ -16,6 +16,7 @@ package boot
import (
"fmt"
+ "path"
"sort"
"strings"
@@ -29,6 +30,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/devices/ttydev"
"gvisor.dev/gvisor/pkg/sentry/devices/tundev"
"gvisor.dev/gvisor/pkg/sentry/fs/user"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/cgroupfs"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/devpts"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/fuse"
@@ -37,6 +39,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/fsimpl/proc"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/sys"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/verity"
"gvisor.dev/gvisor/pkg/sentry/inet"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@@ -50,6 +53,10 @@ func registerFilesystems(k *kernel.Kernel) error {
creds := auth.NewRootCredentials(k.RootUserNamespace())
vfsObj := k.VFS()
+ vfsObj.MustRegisterFilesystemType(cgroupfs.Name, &cgroupfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+ AllowUserMount: true,
+ AllowUserList: true,
+ })
vfsObj.MustRegisterFilesystemType(devpts.Name, &devpts.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
AllowUserList: true,
// TODO(b/29356795): Users may mount this once the terminals are in a
@@ -60,6 +67,10 @@ func registerFilesystems(k *kernel.Kernel) error {
AllowUserMount: true,
AllowUserList: true,
})
+ vfsObj.MustRegisterFilesystemType(fuse.Name, &fuse.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+ AllowUserMount: true,
+ AllowUserList: true,
+ })
vfsObj.MustRegisterFilesystemType(gofer.Name, &gofer.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
AllowUserList: true,
})
@@ -79,9 +90,9 @@ func registerFilesystems(k *kernel.Kernel) error {
AllowUserMount: true,
AllowUserList: true,
})
- vfsObj.MustRegisterFilesystemType(fuse.Name, &fuse.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
- AllowUserMount: true,
+ vfsObj.MustRegisterFilesystemType(verity.Name, &verity.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
AllowUserList: true,
+ AllowUserMount: true,
})
// Setup files in devtmpfs.
@@ -472,6 +483,12 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo
var data []string
var iopts interface{}
+ verityData, verityOpts, verityRequested, remainingMOpts, err := parseVerityMountOptions(m.Options)
+ if err != nil {
+ return "", nil, false, err
+ }
+ m.Options = remainingMOpts
+
// Find filesystem name and FS specific data field.
switch m.Type {
case devpts.Name, devtmpfs.Name, proc.Name, sys.Name:
@@ -502,6 +519,13 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo
// If configured, add overlay to all writable mounts.
useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly
+ case cgroupfs.Name:
+ var err error
+ data, err = parseAndFilterOptions(m.Options, cgroupfs.SupportedMountOptions...)
+ if err != nil {
+ return "", nil, false, err
+ }
+
default:
log.Warningf("ignoring unknown filesystem type %q", m.Type)
return "", nil, false, nil
@@ -530,9 +554,75 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo
}
}
+ if verityRequested {
+ verityData = verityData + "root_name=" + path.Base(m.Mount.Destination)
+ verityOpts.LowerName = fsName
+ verityOpts.LowerGetFSOptions = opts.GetFilesystemOptions
+ fsName = verity.Name
+ opts = &vfs.MountOptions{
+ GetFilesystemOptions: vfs.GetFilesystemOptions{
+ Data: verityData,
+ InternalData: verityOpts,
+ },
+ InternalMount: true,
+ }
+ }
+
return fsName, opts, useOverlay, nil
}
+func parseKeyValue(s string) (string, string, bool) {
+ tokens := strings.SplitN(s, "=", 2)
+ if len(tokens) < 2 {
+ return "", "", false
+ }
+ return strings.TrimSpace(tokens[0]), strings.TrimSpace(tokens[1]), true
+}
+
+// parseAndFilterOptions scans the provided mount options for verity-related
+// mount options. It returns the parsed set of verity mount options, as well as
+// the filtered set of mount options unrelated to verity.
+func parseVerityMountOptions(mopts []string) (string, verity.InternalFilesystemOptions, bool, []string, error) {
+ nonVerity := []string{}
+ found := false
+ var rootHash string
+ verityOpts := verity.InternalFilesystemOptions{
+ Action: verity.PanicOnViolation,
+ }
+ for _, o := range mopts {
+ if !strings.HasPrefix(o, "verity.") {
+ nonVerity = append(nonVerity, o)
+ continue
+ }
+
+ k, v, ok := parseKeyValue(o)
+ if !ok {
+ return "", verityOpts, found, nonVerity, fmt.Errorf("invalid verity mount option with no value: %q", o)
+ }
+
+ found = true
+ switch k {
+ case "verity.roothash":
+ rootHash = v
+ case "verity.action":
+ switch v {
+ case "error":
+ verityOpts.Action = verity.ErrorOnViolation
+ case "panic":
+ verityOpts.Action = verity.PanicOnViolation
+ default:
+ log.Warningf("Invalid verity action %q", v)
+ verityOpts.Action = verity.PanicOnViolation
+ }
+ default:
+ return "", verityOpts, found, nonVerity, fmt.Errorf("unknown verity mount option: %q", k)
+ }
+ }
+ verityOpts.AllowRuntimeEnable = len(rootHash) == 0
+ verityData := "root_hash=" + rootHash + ","
+ return verityData, verityOpts, found, nonVerity, nil
+}
+
// mountTmpVFS2 mounts an internal tmpfs at '/tmp' if it's safe to do so.
// Technically we don't have to mount tmpfs at /tmp, as we could just rely on
// the host /tmp, but this is a nice optimization, and fixes some apps that call
diff --git a/runsc/cli/main.go b/runsc/cli/main.go
index a3c515f4b..6db6614cc 100644
--- a/runsc/cli/main.go
+++ b/runsc/cli/main.go
@@ -86,6 +86,7 @@ func Main(version string) {
subcommands.Register(new(cmd.Symbolize), "")
subcommands.Register(new(cmd.Wait), "")
subcommands.Register(new(cmd.Mitigate), "")
+ subcommands.Register(new(cmd.VerityPrepare), "")
// Register internal commands with the internal group name. This causes
// them to be sorted below the user-facing commands with empty group.
diff --git a/runsc/cmd/BUILD b/runsc/cmd/BUILD
index 2c3b4058b..4b9987cf6 100644
--- a/runsc/cmd/BUILD
+++ b/runsc/cmd/BUILD
@@ -35,6 +35,7 @@ go_library(
"statefile.go",
"symbolize.go",
"syscalls.go",
+ "verity_prepare.go",
"wait.go",
],
visibility = [
diff --git a/runsc/cmd/do.go b/runsc/cmd/do.go
index 455c57692..5485db149 100644
--- a/runsc/cmd/do.go
+++ b/runsc/cmd/do.go
@@ -126,9 +126,8 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su
Hostname: hostname,
}
- specutils.LogSpec(spec)
-
cid := fmt.Sprintf("runsc-%06d", rand.Int31n(1000000))
+
if conf.Network == config.NetworkNone {
addNamespace(spec, specs.LinuxNamespace{Type: specs.NetworkNamespace})
@@ -154,55 +153,7 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su
}
}
- out, err := json.Marshal(spec)
- if err != nil {
- return Errorf("Error to marshal spec: %v", err)
- }
- tmpDir, err := ioutil.TempDir("", "runsc-do")
- if err != nil {
- return Errorf("Error to create tmp dir: %v", err)
- }
- defer os.RemoveAll(tmpDir)
-
- log.Infof("Changing configuration RootDir to %q", tmpDir)
- conf.RootDir = tmpDir
-
- cfgPath := filepath.Join(tmpDir, "config.json")
- if err := ioutil.WriteFile(cfgPath, out, 0755); err != nil {
- return Errorf("Error write spec: %v", err)
- }
-
- containerArgs := container.Args{
- ID: cid,
- Spec: spec,
- BundleDir: tmpDir,
- Attached: true,
- }
- ct, err := container.New(conf, containerArgs)
- if err != nil {
- return Errorf("creating container: %v", err)
- }
- defer ct.Destroy()
-
- if err := ct.Start(conf); err != nil {
- return Errorf("starting container: %v", err)
- }
-
- // Forward signals to init in the container. Thus if we get SIGINT from
- // ^C, the container gracefully exit, and we can clean up.
- //
- // N.B. There is a still a window before this where a signal may kill
- // this process, skipping cleanup.
- stopForwarding := ct.ForwardSignals(0 /* pid */, false /* fgProcess */)
- defer stopForwarding()
-
- ws, err := ct.Wait()
- if err != nil {
- return Errorf("waiting for container: %v", err)
- }
-
- *waitStatus = ws
- return subcommands.ExitSuccess
+ return startContainerAndWait(spec, conf, cid, waitStatus)
}
func addNamespace(spec *specs.Spec, ns specs.LinuxNamespace) {
@@ -397,3 +348,58 @@ func calculatePeerIP(ip string) (string, error) {
}
return fmt.Sprintf("%s.%s.%s.%d", parts[0], parts[1], parts[2], n), nil
}
+
+func startContainerAndWait(spec *specs.Spec, conf *config.Config, cid string, waitStatus *unix.WaitStatus) subcommands.ExitStatus {
+ specutils.LogSpec(spec)
+
+ out, err := json.Marshal(spec)
+ if err != nil {
+ return Errorf("Error to marshal spec: %v", err)
+ }
+ tmpDir, err := ioutil.TempDir("", "runsc-do")
+ if err != nil {
+ return Errorf("Error to create tmp dir: %v", err)
+ }
+ defer os.RemoveAll(tmpDir)
+
+ log.Infof("Changing configuration RootDir to %q", tmpDir)
+ conf.RootDir = tmpDir
+
+ cfgPath := filepath.Join(tmpDir, "config.json")
+ if err := ioutil.WriteFile(cfgPath, out, 0755); err != nil {
+ return Errorf("Error write spec: %v", err)
+ }
+
+ containerArgs := container.Args{
+ ID: cid,
+ Spec: spec,
+ BundleDir: tmpDir,
+ Attached: true,
+ }
+
+ ct, err := container.New(conf, containerArgs)
+ if err != nil {
+ return Errorf("creating container: %v", err)
+ }
+ defer ct.Destroy()
+
+ if err := ct.Start(conf); err != nil {
+ return Errorf("starting container: %v", err)
+ }
+
+ // Forward signals to init in the container. Thus if we get SIGINT from
+ // ^C, the container gracefully exit, and we can clean up.
+ //
+ // N.B. There is a still a window before this where a signal may kill
+ // this process, skipping cleanup.
+ stopForwarding := ct.ForwardSignals(0 /* pid */, false /* fgProcess */)
+ defer stopForwarding()
+
+ ws, err := ct.Wait()
+ if err != nil {
+ return Errorf("waiting for container: %v", err)
+ }
+
+ *waitStatus = ws
+ return subcommands.ExitSuccess
+}
diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go
index 4cb0164dd..6a755ecb6 100644
--- a/runsc/cmd/gofer.go
+++ b/runsc/cmd/gofer.go
@@ -176,7 +176,7 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
mountIdx := 1 // first one is the root
for _, m := range spec.Mounts {
- if specutils.Is9PMount(m) {
+ if specutils.Is9PMount(m, conf.VFS2) {
cfg := fsgofer.Config{
ROMount: isReadonlyMount(m.Options) || conf.Overlay,
HostUDS: conf.FSGoferHostUDS,
@@ -350,7 +350,7 @@ func setupRootFS(spec *specs.Spec, conf *config.Config) error {
// creates directories as needed.
func setupMounts(conf *config.Config, mounts []specs.Mount, root string) error {
for _, m := range mounts {
- if m.Type != "bind" || !specutils.IsVFS1SupportedDevMount(m) {
+ if !specutils.Is9PMount(m, conf.VFS2) {
continue
}
@@ -390,7 +390,7 @@ func setupMounts(conf *config.Config, mounts []specs.Mount, root string) error {
func resolveMounts(conf *config.Config, mounts []specs.Mount, root string) ([]specs.Mount, error) {
cleanMounts := make([]specs.Mount, 0, len(mounts))
for _, m := range mounts {
- if m.Type != "bind" || !specutils.IsVFS1SupportedDevMount(m) {
+ if !specutils.Is9PMount(m, conf.VFS2) {
cleanMounts = append(cleanMounts, m)
continue
}
diff --git a/runsc/cmd/verity_prepare.go b/runsc/cmd/verity_prepare.go
new file mode 100644
index 000000000..66128b2a3
--- /dev/null
+++ b/runsc/cmd/verity_prepare.go
@@ -0,0 +1,108 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+ "context"
+ "fmt"
+ "math/rand"
+ "os"
+
+ "github.com/google/subcommands"
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+ "golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/runsc/config"
+ "gvisor.dev/gvisor/runsc/flag"
+ "gvisor.dev/gvisor/runsc/specutils"
+)
+
+// VerityPrepare implements subcommands.Commands for the "verity-prepare"
+// command. It sets up a sandbox with a writable verity mount mapped to "--dir",
+// and executes the verity measure tool specified by "--tool" in the sandbox. It
+// is intended to prepare --dir to be mounted as a verity filesystem.
+type VerityPrepare struct {
+ root string
+ tool string
+ dir string
+}
+
+// Name implements subcommands.Command.Name.
+func (*VerityPrepare) Name() string {
+ return "verity-prepare"
+}
+
+// Synopsis implements subcommands.Command.Synopsis.
+func (*VerityPrepare) Synopsis() string {
+ return "Generates the data structures necessary to enable verityfs on a filesystem."
+}
+
+// Usage implements subcommands.Command.Usage.
+func (*VerityPrepare) Usage() string {
+ return "verity-prepare --tool=<measure_tool> --dir=<path>"
+}
+
+// SetFlags implements subcommands.Command.SetFlags.
+func (c *VerityPrepare) SetFlags(f *flag.FlagSet) {
+ f.StringVar(&c.root, "root", "/", `path to the root directory, defaults to "/"`)
+ f.StringVar(&c.tool, "tool", "", "path to the verity measure_tool")
+ f.StringVar(&c.dir, "dir", "", "path to the directory to be hashed")
+}
+
+// Execute implements subcommands.Command.Execute.
+func (c *VerityPrepare) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
+ conf := args[0].(*config.Config)
+ waitStatus := args[1].(*unix.WaitStatus)
+
+ hostname, err := os.Hostname()
+ if err != nil {
+ return Errorf("Error to retrieve hostname: %v", err)
+ }
+
+ // Map the entire host file system.
+ absRoot, err := resolvePath(c.root)
+ if err != nil {
+ return Errorf("Error resolving root: %v", err)
+ }
+
+ spec := &specs.Spec{
+ Root: &specs.Root{
+ Path: absRoot,
+ },
+ Process: &specs.Process{
+ Cwd: absRoot,
+ Args: []string{c.tool, "--path", "/verityroot"},
+ Env: os.Environ(),
+ Capabilities: specutils.AllCapabilities(),
+ },
+ Hostname: hostname,
+ Mounts: []specs.Mount{
+ specs.Mount{
+ Source: c.dir,
+ Destination: "/verityroot",
+ Type: "bind",
+ Options: []string{"verity.roothash="},
+ },
+ },
+ }
+
+ cid := fmt.Sprintf("runsc-%06d", rand.Int31n(1000000))
+
+ // Force no networking, it is not necessary to run the verity measure tool.
+ conf.Network = config.NetworkNone
+
+ conf.Verity = true
+
+ return startContainerAndWait(spec, conf, cid, waitStatus)
+}
diff --git a/runsc/config/config.go b/runsc/config/config.go
index 1e5858837..0b2b97cc5 100644
--- a/runsc/config/config.go
+++ b/runsc/config/config.go
@@ -172,6 +172,9 @@ type Config struct {
// Enables seccomp inside the sandbox.
OCISeccomp bool `flag:"oci-seccomp"`
+ // Mounts the cgroup filesystem backed by the sentry's cgroupfs.
+ Cgroupfs bool `flag:"cgroupfs"`
+
// TestOnlyAllowRunAsCurrentUserWithoutChroot should only be used in
// tests. It allows runsc to start the sandbox process as the current
// user, and without chrooting the sandbox process. This can be
diff --git a/runsc/config/flags.go b/runsc/config/flags.go
index 1d996c841..13a1a0163 100644
--- a/runsc/config/flags.go
+++ b/runsc/config/flags.go
@@ -75,6 +75,7 @@ func RegisterFlags() {
flag.Bool("fsgofer-host-uds", false, "allow the gofer to mount Unix Domain Sockets.")
flag.Bool("vfs2", false, "enables VFSv2. This uses the new VFS layer that is faster than the previous one.")
flag.Bool("fuse", false, "TEST ONLY; use while FUSE in VFSv2 is landing. This allows the use of the new experimental FUSE filesystem.")
+ flag.Bool("cgroupfs", false, "Automatically mount cgroupfs.")
// Flags that control sandbox runtime behavior: network related.
flag.Var(networkTypePtr(NetworkSandbox), "network", "specifies which network to use: sandbox (default), host, none. Using network inside the sandbox is more secure because it's isolated from the host network.")
diff --git a/runsc/container/container.go b/runsc/container/container.go
index f9d83c118..e72ada311 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -886,7 +886,7 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *config.Config, bu
// Add root mount and then add any other additional mounts.
mountCount := 1
for _, m := range spec.Mounts {
- if specutils.Is9PMount(m) {
+ if specutils.Is9PMount(m, conf.VFS2) {
mountCount++
}
}
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
index 450f92645..47da2dd10 100644
--- a/runsc/sandbox/sandbox.go
+++ b/runsc/sandbox/sandbox.go
@@ -486,7 +486,7 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn
}
if deviceFile, err := gPlatform.OpenDevice(); err != nil {
- return fmt.Errorf("opening device file for platform %q: %v", gPlatform, err)
+ return fmt.Errorf("opening device file for platform %q: %v", conf.Platform, err)
} else if deviceFile != nil {
defer deviceFile.Close()
cmd.ExtraFiles = append(cmd.ExtraFiles, deviceFile)
@@ -1174,7 +1174,7 @@ func deviceFileForPlatform(name string) (*os.File, error) {
f, err := p.OpenDevice()
if err != nil {
- return nil, fmt.Errorf("opening device file for platform %q: %v", p, err)
+ return nil, fmt.Errorf("opening device file for platform %q: %w", name, err)
}
return f, nil
}
diff --git a/runsc/specutils/fs.go b/runsc/specutils/fs.go
index b62504a8c..9ecd0fde6 100644
--- a/runsc/specutils/fs.go
+++ b/runsc/specutils/fs.go
@@ -18,6 +18,7 @@ import (
"fmt"
"math/bits"
"path"
+ "strings"
specs "github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/sys/unix"
@@ -64,6 +65,12 @@ var optionsMap = map[string]mapping{
"sync": {set: true, val: unix.MS_SYNCHRONOUS},
}
+// verityMountOptions is the set of valid verity mount option keys.
+var verityMountOptions = map[string]struct{}{
+ "verity.roothash": struct{}{},
+ "verity.action": struct{}{},
+}
+
// propOptionsMap is similar to optionsMap, but it lists propagation options
// that cannot be used together with other flags.
var propOptionsMap = map[string]mapping{
@@ -117,6 +124,14 @@ func validateMount(mnt *specs.Mount) error {
return nil
}
+func moptKey(opt string) string {
+ if len(opt) == 0 {
+ return opt
+ }
+ // Guaranteed to have at least one token, since opt is not empty.
+ return strings.SplitN(opt, "=", 2)[0]
+}
+
// ValidateMountOptions validates that mount options are correct.
func ValidateMountOptions(opts []string) error {
for _, o := range opts {
@@ -125,7 +140,8 @@ func ValidateMountOptions(opts []string) error {
}
_, ok1 := optionsMap[o]
_, ok2 := propOptionsMap[o]
- if !ok1 && !ok2 {
+ _, ok3 := verityMountOptions[moptKey(o)]
+ if !ok1 && !ok2 && !ok3 {
return fmt.Errorf("unknown mount option %q", o)
}
if err := validatePropagation(o); err != nil {
diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go
index 45856fd58..e5e66546c 100644
--- a/runsc/specutils/specutils.go
+++ b/runsc/specutils/specutils.go
@@ -332,14 +332,20 @@ func capsFromNames(names []string, skipSet map[linux.Capability]struct{}) (auth.
return auth.CapabilitySetOfMany(caps), nil
}
-// Is9PMount returns true if the given mount can be mounted as an external gofer.
-func Is9PMount(m specs.Mount) bool {
- return m.Type == "bind" && m.Source != "" && IsVFS1SupportedDevMount(m)
+// Is9PMount returns true if the given mount can be mounted as an external
+// gofer.
+func Is9PMount(m specs.Mount, vfs2Enabled bool) bool {
+ return m.Type == "bind" && m.Source != "" && IsSupportedDevMount(m, vfs2Enabled)
}
-// IsVFS1SupportedDevMount returns true if m.Destination does not specify a
+// IsSupportedDevMount returns true if m.Destination does not specify a
// path that is hardcoded by VFS1's implementation of /dev.
-func IsVFS1SupportedDevMount(m specs.Mount) bool {
+func IsSupportedDevMount(m specs.Mount, vfs2Enabled bool) bool {
+ // VFS2 has no hardcoded files under /dev, so everything is allowed.
+ if vfs2Enabled {
+ return true
+ }
+
// See pkg/sentry/fs/dev/dev.go.
var existingDevices = []string{
"/dev/fd", "/dev/stdin", "/dev/stdout", "/dev/stderr",
diff --git a/shim/BUILD b/shim/BUILD
index 434269d31..695f61eb9 100644
--- a/shim/BUILD
+++ b/shim/BUILD
@@ -6,6 +6,7 @@ go_binary(
name = "containerd-shim-runsc-v1",
srcs = ["main.go"],
static = True,
+ tags = ["staging"],
visibility = [
"//visibility:public",
],
diff --git a/test/benchmarks/base/BUILD b/test/benchmarks/base/BUILD
index 697ab5837..a5a3cf2c1 100644
--- a/test/benchmarks/base/BUILD
+++ b/test/benchmarks/base/BUILD
@@ -17,7 +17,6 @@ go_library(
benchmark_test(
name = "startup_test",
- size = "enormous",
srcs = ["startup_test.go"],
visibility = ["//:sandbox"],
deps = [
@@ -29,7 +28,6 @@ benchmark_test(
benchmark_test(
name = "size_test",
- size = "enormous",
srcs = ["size_test.go"],
visibility = ["//:sandbox"],
deps = [
@@ -42,7 +40,6 @@ benchmark_test(
benchmark_test(
name = "sysbench_test",
- size = "enormous",
srcs = ["sysbench_test.go"],
visibility = ["//:sandbox"],
deps = [
diff --git a/test/benchmarks/database/BUILD b/test/benchmarks/database/BUILD
index 0b1743603..fee2695ff 100644
--- a/test/benchmarks/database/BUILD
+++ b/test/benchmarks/database/BUILD
@@ -11,7 +11,6 @@ go_library(
benchmark_test(
name = "redis_test",
- size = "enormous",
srcs = ["redis_test.go"],
library = ":database",
visibility = ["//:sandbox"],
diff --git a/test/benchmarks/fs/BUILD b/test/benchmarks/fs/BUILD
index dc82e63b2..c2b981a07 100644
--- a/test/benchmarks/fs/BUILD
+++ b/test/benchmarks/fs/BUILD
@@ -4,7 +4,6 @@ package(licenses = ["notice"])
benchmark_test(
name = "bazel_test",
- size = "enormous",
srcs = ["bazel_test.go"],
visibility = ["//:sandbox"],
deps = [
@@ -18,7 +17,6 @@ benchmark_test(
benchmark_test(
name = "fio_test",
- size = "enormous",
srcs = ["fio_test.go"],
visibility = ["//:sandbox"],
deps = [
diff --git a/test/benchmarks/media/BUILD b/test/benchmarks/media/BUILD
index 380783f0b..ad2ef3a55 100644
--- a/test/benchmarks/media/BUILD
+++ b/test/benchmarks/media/BUILD
@@ -11,7 +11,6 @@ go_library(
benchmark_test(
name = "ffmpeg_test",
- size = "enormous",
srcs = ["ffmpeg_test.go"],
library = ":media",
visibility = ["//:sandbox"],
diff --git a/test/benchmarks/ml/BUILD b/test/benchmarks/ml/BUILD
index 3425b8dad..56a4d4f39 100644
--- a/test/benchmarks/ml/BUILD
+++ b/test/benchmarks/ml/BUILD
@@ -11,7 +11,6 @@ go_library(
benchmark_test(
name = "tensorflow_test",
- size = "enormous",
srcs = ["tensorflow_test.go"],
library = ":ml",
visibility = ["//:sandbox"],
diff --git a/test/benchmarks/network/BUILD b/test/benchmarks/network/BUILD
index 2741570f5..e047020bf 100644
--- a/test/benchmarks/network/BUILD
+++ b/test/benchmarks/network/BUILD
@@ -18,7 +18,6 @@ go_library(
benchmark_test(
name = "iperf_test",
- size = "enormous",
srcs = [
"iperf_test.go",
],
@@ -34,7 +33,6 @@ benchmark_test(
benchmark_test(
name = "node_test",
- size = "enormous",
srcs = [
"node_test.go",
],
@@ -49,7 +47,6 @@ benchmark_test(
benchmark_test(
name = "ruby_test",
- size = "enormous",
srcs = [
"ruby_test.go",
],
@@ -64,7 +61,6 @@ benchmark_test(
benchmark_test(
name = "nginx_test",
- size = "enormous",
srcs = [
"nginx_test.go",
],
@@ -79,7 +75,6 @@ benchmark_test(
benchmark_test(
name = "httpd_test",
- size = "enormous",
srcs = [
"httpd_test.go",
],
diff --git a/test/e2e/BUILD b/test/e2e/BUILD
index 29a84f184..3b3dadf04 100644
--- a/test/e2e/BUILD
+++ b/test/e2e/BUILD
@@ -8,7 +8,6 @@ go_test(
srcs = [
"exec_test.go",
"integration_test.go",
- "regression_test.go",
],
library = ":integration",
tags = [
diff --git a/test/e2e/integration_test.go b/test/e2e/integration_test.go
index 49cd74887..1accc3b3b 100644
--- a/test/e2e/integration_test.go
+++ b/test/e2e/integration_test.go
@@ -168,13 +168,6 @@ func TestCheckpointRestore(t *testing.T) {
t.Skip("Pause/resume is not supported.")
}
- // TODO(gvisor.dev/issue/3373): Remove after implementing.
- if usingVFS2, err := dockerutil.UsingVFS2(); usingVFS2 {
- t.Skip("CheckpointRestore not implemented in VFS2.")
- } else if err != nil {
- t.Fatalf("failed to read config for runtime %s: %v", dockerutil.Runtime(), err)
- }
-
ctx := context.Background()
d := dockerutil.MakeContainer(ctx, t)
defer d.CleanUp(ctx)
@@ -399,15 +392,15 @@ func TestTmpFile(t *testing.T) {
// TestTmpMount checks that mounts inside '/tmp' are not overridden.
func TestTmpMount(t *testing.T) {
- ctx := context.Background()
dir, err := ioutil.TempDir(testutil.TmpDir(), "tmp-mount")
if err != nil {
t.Fatalf("TempDir(): %v", err)
}
- want := "123"
+ const want = "123"
if err := ioutil.WriteFile(filepath.Join(dir, "file.txt"), []byte("123"), 0666); err != nil {
t.Fatalf("WriteFile(): %v", err)
}
+ ctx := context.Background()
d := dockerutil.MakeContainer(ctx, t)
defer d.CleanUp(ctx)
@@ -430,6 +423,48 @@ func TestTmpMount(t *testing.T) {
}
}
+// Test that it is allowed to mount a file on top of /dev files, e.g.
+// /dev/random.
+func TestMountOverDev(t *testing.T) {
+ if usingVFS2, err := dockerutil.UsingVFS2(); !usingVFS2 {
+ t.Skip("VFS1 doesn't allow /dev/random to be mounted.")
+ } else if err != nil {
+ t.Fatalf("Failed to read config for runtime %s: %v", dockerutil.Runtime(), err)
+ }
+
+ random, err := ioutil.TempFile(testutil.TmpDir(), "random")
+ if err != nil {
+ t.Fatal("ioutil.TempFile() failed:", err)
+ }
+ const want = "123"
+ if _, err := random.WriteString(want); err != nil {
+ t.Fatalf("WriteString() to %q: %v", random.Name(), err)
+ }
+
+ ctx := context.Background()
+ d := dockerutil.MakeContainer(ctx, t)
+ defer d.CleanUp(ctx)
+
+ opts := dockerutil.RunOpts{
+ Image: "basic/alpine",
+ Mounts: []mount.Mount{
+ {
+ Type: mount.TypeBind,
+ Source: random.Name(),
+ Target: "/dev/random",
+ },
+ },
+ }
+ cmd := "dd count=1 bs=5 if=/dev/random 2> /dev/null"
+ got, err := d.Run(ctx, opts, "sh", "-c", cmd)
+ if err != nil {
+ t.Fatalf("docker run failed: %v", err)
+ }
+ if want != got {
+ t.Errorf("invalid file content, want: %q, got: %q", want, got)
+ }
+}
+
// TestSyntheticDirs checks that submounts can be created inside a readonly
// mount even if the target path does not exist.
func TestSyntheticDirs(t *testing.T) {
@@ -550,6 +585,30 @@ func runIntegrationTest(t *testing.T, capAdd []string, args ...string) {
}
}
+// Test that UDS can be created using overlay when parent directory is in lower
+// layer only (b/134090485).
+//
+// Prerequisite: the directory where the socket file is created must not have
+// been open for write before bind(2) is called.
+func TestBindOverlay(t *testing.T) {
+ ctx := context.Background()
+ d := dockerutil.MakeContainer(ctx, t)
+ defer d.CleanUp(ctx)
+
+ // Run the container.
+ got, err := d.Run(ctx, dockerutil.RunOpts{
+ Image: "basic/ubuntu",
+ }, "bash", "-c", "nc -q -1 -l -U /var/run/sock & p=$! && sleep 1 && echo foobar-asdf | nc -q 0 -U /var/run/sock && wait $p")
+ if err != nil {
+ t.Fatalf("docker run failed: %v", err)
+ }
+
+ // Check the output contains what we want.
+ if want := "foobar-asdf"; !strings.Contains(got, want) {
+ t.Fatalf("docker run output is missing %q: %s", want, got)
+ }
+}
+
func TestMain(m *testing.M) {
dockerutil.EnsureSupportedDockerVersion()
flag.Parse()
diff --git a/test/e2e/regression_test.go b/test/e2e/regression_test.go
deleted file mode 100644
index 84564cdaa..000000000
--- a/test/e2e/regression_test.go
+++ /dev/null
@@ -1,47 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package integration
-
-import (
- "context"
- "strings"
- "testing"
-
- "gvisor.dev/gvisor/pkg/test/dockerutil"
-)
-
-// Test that UDS can be created using overlay when parent directory is in lower
-// layer only (b/134090485).
-//
-// Prerequisite: the directory where the socket file is created must not have
-// been open for write before bind(2) is called.
-func TestBindOverlay(t *testing.T) {
- ctx := context.Background()
- d := dockerutil.MakeContainer(ctx, t)
- defer d.CleanUp(ctx)
-
- // Run the container.
- got, err := d.Run(ctx, dockerutil.RunOpts{
- Image: "basic/ubuntu",
- }, "bash", "-c", "nc -q -1 -l -U /var/run/sock & p=$! && sleep 1 && echo foobar-asdf | nc -q 0 -U /var/run/sock && wait $p")
- if err != nil {
- t.Fatalf("docker run failed: %v", err)
- }
-
- // Check the output contains what we want.
- if want := "foobar-asdf"; !strings.Contains(got, want) {
- t.Fatalf("docker run output is missing %q: %s", want, got)
- }
-}
diff --git a/test/fsstress/BUILD b/test/fsstress/BUILD
index d262c8554..e74e7fff2 100644
--- a/test/fsstress/BUILD
+++ b/test/fsstress/BUILD
@@ -14,9 +14,7 @@ go_test(
"manual",
"local",
],
- deps = [
- "//pkg/test/dockerutil",
- ],
+ deps = ["//pkg/test/dockerutil"],
)
go_library(
diff --git a/test/fsstress/fsstress_test.go b/test/fsstress/fsstress_test.go
index 300c21ceb..d53c8f90d 100644
--- a/test/fsstress/fsstress_test.go
+++ b/test/fsstress/fsstress_test.go
@@ -17,7 +17,9 @@ package fsstress
import (
"context"
+ "flag"
"math/rand"
+ "os"
"strconv"
"strings"
"testing"
@@ -30,33 +32,44 @@ func init() {
rand.Seed(int64(time.Now().Nanosecond()))
}
-func fsstress(t *testing.T, dir string) {
+func TestMain(m *testing.M) {
+ dockerutil.EnsureSupportedDockerVersion()
+ flag.Parse()
+ os.Exit(m.Run())
+}
+
+type config struct {
+ operations string
+ processes string
+ target string
+}
+
+func fsstress(t *testing.T, conf config) {
ctx := context.Background()
d := dockerutil.MakeContainer(ctx, t)
defer d.CleanUp(ctx)
- const (
- operations = "10000"
- processes = "100"
- image = "basic/fsstress"
- )
+ const image = "basic/fsstress"
seed := strconv.FormatUint(uint64(rand.Uint32()), 10)
- args := []string{"-d", dir, "-n", operations, "-p", processes, "-s", seed, "-X"}
- t.Logf("Repro: docker run --rm --runtime=runsc %s %s", image, strings.Join(args, ""))
+ args := []string{"-d", conf.target, "-n", conf.operations, "-p", conf.processes, "-s", seed, "-X"}
+ t.Logf("Repro: docker run --rm --runtime=%s gvisor.dev/images/%s %s", dockerutil.Runtime(), image, strings.Join(args, " "))
out, err := d.Run(ctx, dockerutil.RunOpts{Image: image}, args...)
if err != nil {
t.Fatalf("docker run failed: %v\noutput: %s", err, out)
}
- lines := strings.SplitN(out, "\n", 2)
- if len(lines) > 1 || !strings.HasPrefix(out, "seed =") {
+ // This is to catch cases where fsstress spews out error messages during clean
+ // up but doesn't return error.
+ if len(out) > 0 {
t.Fatalf("unexpected output: %s", out)
}
}
-func TestFsstressGofer(t *testing.T) {
- fsstress(t, "/test")
-}
-
func TestFsstressTmpfs(t *testing.T) {
- fsstress(t, "/tmp")
+ // This takes between 10s to run on my machine. Adjust as needed.
+ cfg := config{
+ operations: "5000",
+ processes: "20",
+ target: "/tmp",
+ }
+ fsstress(t, cfg)
}
diff --git a/test/image/image_test.go b/test/image/image_test.go
index 968e62f63..952264173 100644
--- a/test/image/image_test.go
+++ b/test/image/image_test.go
@@ -183,7 +183,10 @@ func TestMysql(t *testing.T) {
// Start the container.
if err := server.Spawn(ctx, dockerutil.RunOpts{
Image: "basic/mysql",
- Env: []string{"MYSQL_ROOT_PASSWORD=foobar123"},
+ Env: []string{
+ "MYSQL_ROOT_PASSWORD=foobar123",
+ "MYSQL_ROOT_HOST=%", // Allow anyone to connect to the server.
+ },
}); err != nil {
t.Fatalf("docker run failed: %v", err)
}
diff --git a/test/iptables/BUILD b/test/iptables/BUILD
index 94d4ca2d4..9805665ac 100644
--- a/test/iptables/BUILD
+++ b/test/iptables/BUILD
@@ -16,8 +16,8 @@ go_library(
visibility = ["//test/iptables:__subpackages__"],
deps = [
"//pkg/binary",
+ "//pkg/hostarch",
"//pkg/test/testutil",
- "//pkg/usermem",
"@org_golang_x_sys//unix:go_default_library",
],
)
diff --git a/test/iptables/nat.go b/test/iptables/nat.go
index 70d8a1832..0776639a7 100644
--- a/test/iptables/nat.go
+++ b/test/iptables/nat.go
@@ -22,7 +22,7 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/binary"
- "gvisor.dev/gvisor/pkg/usermem"
+ "gvisor.dev/gvisor/pkg/hostarch"
)
const redirectPort = 42
@@ -848,7 +848,7 @@ func recvOrigDstAddr4(sockfd int) (unix.RawSockaddrInet4, error) {
return unix.RawSockaddrInet4{}, err
}
var addr unix.RawSockaddrInet4
- binary.Unmarshal(buf, usermem.ByteOrder, &addr)
+ binary.Unmarshal(buf, hostarch.ByteOrder, &addr)
return addr, nil
}
@@ -858,7 +858,7 @@ func recvOrigDstAddr6(sockfd int) (unix.RawSockaddrInet6, error) {
return unix.RawSockaddrInet6{}, err
}
var addr unix.RawSockaddrInet6
- binary.Unmarshal(buf, usermem.ByteOrder, &addr)
+ binary.Unmarshal(buf, hostarch.ByteOrder, &addr)
return addr, nil
}
diff --git a/test/packetimpact/runner/defs.bzl b/test/packetimpact/runner/defs.bzl
index 34e83ec49..634c15727 100644
--- a/test/packetimpact/runner/defs.bzl
+++ b/test/packetimpact/runner/defs.bzl
@@ -246,6 +246,12 @@ ALL_TESTS = [
expect_netstack_failure = True,
),
PacketimpactTestInfo(
+ name = "tcp_listen_backlog",
+ ),
+ PacketimpactTestInfo(
+ name = "tcp_syncookie",
+ ),
+ PacketimpactTestInfo(
name = "icmpv6_param_problem",
),
PacketimpactTestInfo(
diff --git a/test/packetimpact/testbench/BUILD b/test/packetimpact/testbench/BUILD
index 43b4c7ca1..616215dc3 100644
--- a/test/packetimpact/testbench/BUILD
+++ b/test/packetimpact/testbench/BUILD
@@ -16,11 +16,11 @@ go_library(
],
visibility = ["//test/packetimpact:__subpackages__"],
deps = [
+ "//pkg/hostarch",
"//pkg/tcpip",
"//pkg/tcpip/buffer",
"//pkg/tcpip/header",
"//pkg/tcpip/seqnum",
- "//pkg/usermem",
"//test/packetimpact/proto:posix_server_go_proto",
"@com_github_google_go_cmp//cmp:go_default_library",
"@com_github_google_go_cmp//cmp/cmpopts:go_default_library",
diff --git a/test/packetimpact/testbench/rawsockets.go b/test/packetimpact/testbench/rawsockets.go
index 1ac96626a..feeb0888a 100644
--- a/test/packetimpact/testbench/rawsockets.go
+++ b/test/packetimpact/testbench/rawsockets.go
@@ -23,7 +23,7 @@ import (
"time"
"golang.org/x/sys/unix"
- "gvisor.dev/gvisor/pkg/usermem"
+ "gvisor.dev/gvisor/pkg/hostarch"
)
// Sniffer can sniff raw packets on the wire.
@@ -34,7 +34,7 @@ type Sniffer struct {
func htons(x uint16) uint16 {
buf := [2]byte{}
binary.BigEndian.PutUint16(buf[:], x)
- return usermem.ByteOrder.Uint16(buf[:])
+ return hostarch.ByteOrder.Uint16(buf[:])
}
// NewSniffer creates a Sniffer connected to *device.
diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD
index c0deb33e5..83ff70951 100644
--- a/test/packetimpact/tests/BUILD
+++ b/test/packetimpact/tests/BUILD
@@ -105,8 +105,8 @@ packetimpact_testbench(
deps = [
"//pkg/abi/linux",
"//pkg/binary",
+ "//pkg/hostarch",
"//pkg/tcpip/header",
- "//pkg/usermem",
"//test/packetimpact/testbench",
"@org_golang_x_sys//unix:go_default_library",
],
@@ -354,9 +354,9 @@ packetimpact_testbench(
deps = [
"//pkg/abi/linux",
"//pkg/binary",
+ "//pkg/hostarch",
"//pkg/tcpip/header",
"//pkg/tcpip/seqnum",
- "//pkg/usermem",
"//test/packetimpact/testbench",
"@org_golang_x_sys//unix:go_default_library",
],
@@ -368,8 +368,8 @@ packetimpact_testbench(
deps = [
"//pkg/abi/linux",
"//pkg/binary",
+ "//pkg/hostarch",
"//pkg/tcpip/header",
- "//pkg/usermem",
"//test/packetimpact/testbench",
"@org_golang_x_sys//unix:go_default_library",
],
@@ -385,6 +385,26 @@ packetimpact_testbench(
],
)
+packetimpact_testbench(
+ name = "tcp_listen_backlog",
+ srcs = ["tcp_listen_backlog_test.go"],
+ deps = [
+ "//pkg/tcpip/header",
+ "//test/packetimpact/testbench",
+ "@org_golang_x_sys//unix:go_default_library",
+ ],
+)
+
+packetimpact_testbench(
+ name = "tcp_syncookie",
+ srcs = ["tcp_syncookie_test.go"],
+ deps = [
+ "//pkg/tcpip/header",
+ "//test/packetimpact/testbench",
+ "@org_golang_x_sys//unix:go_default_library",
+ ],
+)
+
validate_all_tests()
[packetimpact_go_test(
diff --git a/test/packetimpact/tests/tcp_info_test.go b/test/packetimpact/tests/tcp_info_test.go
index 3fc2c7fe5..93f58ec49 100644
--- a/test/packetimpact/tests/tcp_info_test.go
+++ b/test/packetimpact/tests/tcp_info_test.go
@@ -22,8 +22,8 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/tcpip/header"
- "gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/test/packetimpact/testbench"
)
@@ -58,7 +58,7 @@ func TestTCPInfo(t *testing.T) {
if got, want := len(infoBytes), linux.SizeOfTCPInfo; got != want {
t.Fatalf("expected %T, got %d bytes want %d bytes", info, got, want)
}
- binary.Unmarshal(infoBytes, usermem.ByteOrder, &info)
+ binary.Unmarshal(infoBytes, hostarch.ByteOrder, &info)
rtt := time.Duration(info.RTT) * time.Microsecond
rttvar := time.Duration(info.RTTVar) * time.Microsecond
@@ -99,7 +99,7 @@ func TestTCPInfo(t *testing.T) {
if got, want := len(infoBytes), linux.SizeOfTCPInfo; got != want {
t.Fatalf("expected %T, got %d bytes want %d bytes", info, got, want)
}
- binary.Unmarshal(infoBytes, usermem.ByteOrder, &info)
+ binary.Unmarshal(infoBytes, hostarch.ByteOrder, &info)
if info.CaState != linux.TCP_CA_Loss {
t.Errorf("expected the connection to be in loss recovery, got: %v want: %v", info.CaState, linux.TCP_CA_Loss)
}
diff --git a/test/packetimpact/tests/tcp_listen_backlog_test.go b/test/packetimpact/tests/tcp_listen_backlog_test.go
new file mode 100644
index 000000000..26c812d0a
--- /dev/null
+++ b/test/packetimpact/tests/tcp_listen_backlog_test.go
@@ -0,0 +1,86 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tcp_listen_backlog_test
+
+import (
+ "flag"
+ "testing"
+ "time"
+
+ "golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/test/packetimpact/testbench"
+)
+
+func init() {
+ testbench.Initialize(flag.CommandLine)
+}
+
+// TestTCPListenBacklog tests for a listening endpoint behavior:
+// (1) reply to more SYNs than what is configured as listen backlog
+// (2) ignore ACKs (that complete a handshake) when the accept queue is full
+// (3) ignore incoming SYNs when the accept queue is full
+func TestTCPListenBacklog(t *testing.T) {
+ dut := testbench.NewDUT(t)
+
+ // Listening endpoint accepts one more connection than the listen backlog.
+ listenFd, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 0 /*backlog*/)
+
+ var establishedConn testbench.TCPIPv4
+ var incompleteConn testbench.TCPIPv4
+
+ // Test if the DUT listener replies to more SYNs than listen backlog+1
+ for i, conn := range []*testbench.TCPIPv4{&establishedConn, &incompleteConn} {
+ *conn = dut.Net.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
+ // Expect dut connection to have transitioned to SYN-RCVD state.
+ conn.Send(t, testbench.TCP{Flags: testbench.TCPFlags(header.TCPFlagSyn)})
+ if _, err := conn.ExpectData(t, &testbench.TCP{Flags: testbench.TCPFlags(header.TCPFlagSyn | header.TCPFlagAck)}, nil, time.Second); err != nil {
+ t.Fatalf("expected SYN-ACK for %d connection, %s", i, err)
+ }
+ }
+ defer establishedConn.Close(t)
+ defer incompleteConn.Close(t)
+
+ // Send the ACK to complete handshake.
+ establishedConn.Send(t, testbench.TCP{Flags: testbench.TCPFlags(header.TCPFlagAck)})
+ dut.PollOne(t, listenFd, unix.POLLIN, time.Second)
+
+ // Send the ACK to complete handshake, expect this to be ignored by the
+ // listener.
+ incompleteConn.Send(t, testbench.TCP{Flags: testbench.TCPFlags(header.TCPFlagAck)})
+
+ // Drain the accept queue to enable poll for subsequent connections on the
+ // listener.
+ dut.Accept(t, listenFd)
+
+ // The ACK for the incomplete connection should be ignored by the
+ // listening endpoint and the poll on listener should now time out.
+ if pfds := dut.Poll(t, []unix.PollFd{{Fd: listenFd, Events: unix.POLLIN}}, time.Second); len(pfds) != 0 {
+ t.Fatalf("got dut.Poll(...) = %#v", pfds)
+ }
+
+ // Re-send the ACK to complete handshake and re-fill the accept-queue.
+ incompleteConn.Send(t, testbench.TCP{Flags: testbench.TCPFlags(header.TCPFlagAck)})
+ dut.PollOne(t, listenFd, unix.POLLIN, time.Second)
+
+ // Now initiate a new connection when the accept queue is full.
+ connectingConn := dut.Net.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
+ defer connectingConn.Close(t)
+ // Expect dut connection to drop the SYN and let the client stay in SYN_SENT state.
+ connectingConn.Send(t, testbench.TCP{Flags: testbench.TCPFlags(header.TCPFlagSyn)})
+ if got, err := connectingConn.ExpectData(t, &testbench.TCP{Flags: testbench.TCPFlags(header.TCPFlagSyn | header.TCPFlagAck)}, nil, time.Second); err == nil {
+ t.Fatalf("expected no SYN-ACK, but got %s", got)
+ }
+}
diff --git a/test/packetimpact/tests/tcp_rack_test.go b/test/packetimpact/tests/tcp_rack_test.go
index 0a5b0f12b..ff1431bbf 100644
--- a/test/packetimpact/tests/tcp_rack_test.go
+++ b/test/packetimpact/tests/tcp_rack_test.go
@@ -22,9 +22,9 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/seqnum"
- "gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/test/packetimpact/testbench"
)
@@ -74,7 +74,7 @@ func getRTTAndRTO(t *testing.T, dut testbench.DUT, acceptFd int32) (rtt, rto tim
if got, want := len(infoBytes), linux.SizeOfTCPInfo; got != want {
t.Fatalf("expected %T, got %d bytes want %d bytes", info, got, want)
}
- binary.Unmarshal(infoBytes, usermem.ByteOrder, &info)
+ binary.Unmarshal(infoBytes, hostarch.ByteOrder, &info)
return time.Duration(info.RTT) * time.Microsecond, time.Duration(info.RTO) * time.Microsecond
}
@@ -407,7 +407,7 @@ func TestRACKWithLostRetransmission(t *testing.T) {
if got, want := len(infoBytes), linux.SizeOfTCPInfo; got != want {
t.Fatalf("expected %T, got %d bytes want %d bytes", info, got, want)
}
- binary.Unmarshal(infoBytes, usermem.ByteOrder, &info)
+ binary.Unmarshal(infoBytes, hostarch.ByteOrder, &info)
if info.CaState != linux.TCP_CA_Recovery {
t.Fatalf("expected connection to be in fast recovery, want: %v got: %v", linux.TCP_CA_Recovery, info.CaState)
}
diff --git a/test/packetimpact/tests/tcp_retransmits_test.go b/test/packetimpact/tests/tcp_retransmits_test.go
index 3dc8f63ab..1eafe20c3 100644
--- a/test/packetimpact/tests/tcp_retransmits_test.go
+++ b/test/packetimpact/tests/tcp_retransmits_test.go
@@ -23,8 +23,8 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/tcpip/header"
- "gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/test/packetimpact/testbench"
)
@@ -38,7 +38,7 @@ func getRTO(t *testing.T, dut testbench.DUT, acceptFd int32) (rto time.Duration)
if got, want := len(infoBytes), linux.SizeOfTCPInfo; got != want {
t.Fatalf("unexpected size for TCP_INFO, got %d bytes want %d bytes", got, want)
}
- binary.Unmarshal(infoBytes, usermem.ByteOrder, &info)
+ binary.Unmarshal(infoBytes, hostarch.ByteOrder, &info)
return time.Duration(info.RTO) * time.Microsecond
}
diff --git a/test/packetimpact/tests/tcp_syncookie_test.go b/test/packetimpact/tests/tcp_syncookie_test.go
new file mode 100644
index 000000000..1c21c62ff
--- /dev/null
+++ b/test/packetimpact/tests/tcp_syncookie_test.go
@@ -0,0 +1,70 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tcp_syncookie_test
+
+import (
+ "flag"
+ "testing"
+ "time"
+
+ "golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/test/packetimpact/testbench"
+)
+
+func init() {
+ testbench.Initialize(flag.CommandLine)
+}
+
+// TestSynCookie test if the DUT listener is replying back using syn cookies.
+// The test does not complete the handshake by not sending the ACK to SYNACK.
+// When syncookies are not used, this forces the listener to retransmit SYNACK.
+// And when syncookies are being used, there is no such retransmit.
+func TestTCPSynCookie(t *testing.T) {
+ dut := testbench.NewDUT(t)
+
+ // Listening endpoint accepts one more connection than the listen backlog.
+ _, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1 /*backlog*/)
+
+ var withoutSynCookieConn testbench.TCPIPv4
+ var withSynCookieConn testbench.TCPIPv4
+
+ // Test if the DUT listener replies to more SYNs than listen backlog+1
+ for _, conn := range []*testbench.TCPIPv4{&withoutSynCookieConn, &withSynCookieConn} {
+ *conn = dut.Net.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
+ }
+ defer withoutSynCookieConn.Close(t)
+ defer withSynCookieConn.Close(t)
+
+ checkSynAck := func(t *testing.T, conn *testbench.TCPIPv4, expectRetransmit bool) {
+ // Expect dut connection to have transitioned to SYN-RCVD state.
+ conn.Send(t, testbench.TCP{Flags: testbench.TCPFlags(header.TCPFlagSyn)})
+ if _, err := conn.ExpectData(t, &testbench.TCP{Flags: testbench.TCPFlags(header.TCPFlagSyn | header.TCPFlagAck)}, nil, time.Second); err != nil {
+ t.Fatalf("expected SYN-ACK, but got %s", err)
+ }
+
+ // If the DUT listener is using syn cookies, it will not retransmit SYNACK
+ got, err := conn.ExpectData(t, &testbench.TCP{SeqNum: testbench.Uint32(uint32(*conn.RemoteSeqNum(t) - 1)), Flags: testbench.TCPFlags(header.TCPFlagSyn | header.TCPFlagAck)}, nil, 2*time.Second)
+ if expectRetransmit && err != nil {
+ t.Fatalf("expected retransmitted SYN-ACK, but got %s", err)
+ }
+ if !expectRetransmit && err == nil {
+ t.Fatalf("expected no retransmitted SYN-ACK, but got %s", got)
+ }
+ }
+
+ t.Run("without syncookies", func(t *testing.T) { checkSynAck(t, &withoutSynCookieConn, true /*expectRetransmit*/) })
+ t.Run("with syncookies", func(t *testing.T) { checkSynAck(t, &withSynCookieConn, false /*expectRetransmit*/) })
+}
diff --git a/test/perf/BUILD b/test/perf/BUILD
index ed899ac22..71982fc4d 100644
--- a/test/perf/BUILD
+++ b/test/perf/BUILD
@@ -35,7 +35,7 @@ syscall_test(
)
syscall_test(
- size = "enormous",
+ size = "large",
debug = False,
tags = ["nogotsan"],
test = "//test/perf/linux:getdents_benchmark",
@@ -48,7 +48,7 @@ syscall_test(
)
syscall_test(
- size = "enormous",
+ size = "large",
debug = False,
tags = ["nogotsan"],
test = "//test/perf/linux:gettid_benchmark",
@@ -106,7 +106,7 @@ syscall_test(
)
syscall_test(
- size = "enormous",
+ size = "large",
debug = False,
test = "//test/perf/linux:signal_benchmark",
)
@@ -124,9 +124,10 @@ syscall_test(
)
syscall_test(
- size = "enormous",
+ size = "large",
add_overlay = True,
debug = False,
+ tags = ["nogotsan"],
test = "//test/perf/linux:unlink_benchmark",
)
diff --git a/test/perf/linux/getpid_benchmark.cc b/test/perf/linux/getpid_benchmark.cc
index db74cb264..047a034bd 100644
--- a/test/perf/linux/getpid_benchmark.cc
+++ b/test/perf/linux/getpid_benchmark.cc
@@ -31,6 +31,24 @@ void BM_Getpid(benchmark::State& state) {
BENCHMARK(BM_Getpid);
+#ifdef __x86_64__
+
+#define SYSNO_STR1(x) #x
+#define SYSNO_STR(x) SYSNO_STR1(x)
+
+// BM_GetpidOpt uses the most often pattern of calling system calls:
+// mov $SYS_XXX, %eax; syscall.
+void BM_GetpidOpt(benchmark::State& state) {
+ for (auto s : state) {
+ __asm__("movl $" SYSNO_STR(SYS_getpid) ", %%eax\n"
+ "syscall\n"
+ : : : "rax", "rcx", "r11");
+ }
+}
+
+BENCHMARK(BM_GetpidOpt);
+#endif // __x86_64__
+
} // namespace
} // namespace testing
diff --git a/test/runtimes/defs.bzl b/test/runtimes/defs.bzl
index 702522d86..2550b61a3 100644
--- a/test/runtimes/defs.bzl
+++ b/test/runtimes/defs.bzl
@@ -75,7 +75,6 @@ def runtime_test(name, **kwargs):
"local",
"manual",
],
- size = "enormous",
**kwargs
)
diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD
index ef299799e..affcae8fd 100644
--- a/test/syscalls/BUILD
+++ b/test/syscalls/BUILD
@@ -244,6 +244,10 @@ syscall_test(
)
syscall_test(
+ test = "//test/syscalls/linux:verity_ioctl_test",
+)
+
+syscall_test(
test = "//test/syscalls/linux:iptables_test",
)
@@ -318,6 +322,10 @@ syscall_test(
)
syscall_test(
+ test = "//test/syscalls/linux:verity_mount_test",
+)
+
+syscall_test(
size = "medium",
test = "//test/syscalls/linux:mremap_test",
)
@@ -772,8 +780,7 @@ syscall_test(
)
syscall_test(
- # NOTE(b/116636318): Large sendmsg may stall a long time.
- size = "enormous",
+ flaky = 1, # NOTE(b/116636318): Large sendmsg may stall a long time.
shard_count = more_shards,
test = "//test/syscalls/linux:socket_unix_dgram_local_test",
)
@@ -791,8 +798,7 @@ syscall_test(
)
syscall_test(
- # NOTE(b/116636318): Large sendmsg may stall a long time.
- size = "enormous",
+ flaky = 1, # NOTE(b/116636318): Large sendmsg may stall a long time.
shard_count = more_shards,
test = "//test/syscalls/linux:socket_unix_seqpacket_local_test",
)
@@ -995,3 +1001,7 @@ syscall_test(
syscall_test(
test = "//test/syscalls/linux:processes_test",
)
+
+syscall_test(
+ test = "//test/syscalls/linux:cgroup_test",
+)
diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD
index 043ada583..bc2c7c0e3 100644
--- a/test/syscalls/linux/BUILD
+++ b/test/syscalls/linux/BUILD
@@ -1014,6 +1014,22 @@ cc_binary(
],
)
+cc_binary(
+ name = "verity_ioctl_test",
+ testonly = 1,
+ srcs = ["verity_ioctl.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:capability_util",
+ gtest,
+ "//test/util:fs_util",
+ "//test/util:mount_util",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ ],
+)
+
cc_library(
name = "iptables_types",
testonly = 1,
@@ -1304,6 +1320,20 @@ cc_binary(
)
cc_binary(
+ name = "verity_mount_test",
+ testonly = 1,
+ srcs = ["verity_mount.cc"],
+ linkstatic = 1,
+ deps = [
+ gtest,
+ "//test/util:capability_util",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ ],
+)
+
+cc_binary(
name = "mremap_test",
testonly = 1,
srcs = ["mremap.cc"],
@@ -4205,3 +4235,24 @@ cc_binary(
"//test/util:test_util",
],
)
+
+cc_binary(
+ name = "cgroup_test",
+ testonly = 1,
+ srcs = ["cgroup.cc"],
+ linkstatic = 1,
+ deps = [
+ "//test/util:capability_util",
+ "//test/util:cgroup_util",
+ "//test/util:file_descriptor",
+ "//test/util:fs_util",
+ "@com_google_absl//absl/strings",
+ gtest,
+ "//test/util:posix_error",
+ "//test/util:temp_path",
+ "//test/util:test_main",
+ "//test/util:test_util",
+ "@com_google_absl//absl/container:flat_hash_map",
+ "@com_google_absl//absl/container:flat_hash_set",
+ ],
+)
diff --git a/test/syscalls/linux/cgroup.cc b/test/syscalls/linux/cgroup.cc
new file mode 100644
index 000000000..a1006a978
--- /dev/null
+++ b/test/syscalls/linux/cgroup.cc
@@ -0,0 +1,421 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// All tests in this file rely on being about to mount and unmount cgroupfs,
+// which isn't expected to work, or be safe on a general linux system.
+
+#include <sys/mount.h>
+#include <unistd.h>
+
+#include "gtest/gtest.h"
+#include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
+#include "absl/strings/str_split.h"
+#include "test/util/capability_util.h"
+#include "test/util/cgroup_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+namespace {
+
+using ::testing::_;
+using ::testing::Ge;
+using ::testing::Gt;
+
+std::vector<std::string> known_controllers = {"cpu", "cpuset", "cpuacct",
+ "memory"};
+
+bool CgroupsAvailable() {
+ return IsRunningOnGvisor() && !IsRunningWithVFS1() &&
+ TEST_CHECK_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN));
+}
+
+TEST(Cgroup, MountSucceeds) {
+ SKIP_IF(!CgroupsAvailable());
+
+ Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()));
+ Cgroup c = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs(""));
+ EXPECT_NO_ERRNO(c.ContainsCallingProcess());
+}
+
+TEST(Cgroup, SeparateMounts) {
+ SKIP_IF(!CgroupsAvailable());
+
+ Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()));
+
+ for (const auto& ctl : known_controllers) {
+ Cgroup c = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs(ctl));
+ EXPECT_NO_ERRNO(c.ContainsCallingProcess());
+ }
+}
+
+TEST(Cgroup, AllControllersImplicit) {
+ SKIP_IF(!CgroupsAvailable());
+
+ Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()));
+ Cgroup c = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs(""));
+
+ absl::flat_hash_map<std::string, CgroupsEntry> cgroups_entries =
+ ASSERT_NO_ERRNO_AND_VALUE(ProcCgroupsEntries());
+ for (const auto& ctl : known_controllers) {
+ EXPECT_TRUE(cgroups_entries.contains(ctl))
+ << absl::StreamFormat("ctl=%s", ctl);
+ }
+ EXPECT_EQ(cgroups_entries.size(), known_controllers.size());
+}
+
+TEST(Cgroup, AllControllersExplicit) {
+ SKIP_IF(!CgroupsAvailable());
+
+ Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()));
+ Cgroup c = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("all"));
+
+ absl::flat_hash_map<std::string, CgroupsEntry> cgroups_entries =
+ ASSERT_NO_ERRNO_AND_VALUE(ProcCgroupsEntries());
+ for (const auto& ctl : known_controllers) {
+ EXPECT_TRUE(cgroups_entries.contains(ctl))
+ << absl::StreamFormat("ctl=%s", ctl);
+ }
+ EXPECT_EQ(cgroups_entries.size(), known_controllers.size());
+}
+
+TEST(Cgroup, ProcsAndTasks) {
+ SKIP_IF(!CgroupsAvailable());
+
+ Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()));
+ Cgroup c = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs(""));
+ absl::flat_hash_set<pid_t> pids = ASSERT_NO_ERRNO_AND_VALUE(c.Procs());
+ absl::flat_hash_set<pid_t> tids = ASSERT_NO_ERRNO_AND_VALUE(c.Tasks());
+
+ EXPECT_GE(tids.size(), pids.size()) << "Found more processes than threads";
+
+ // Pids should be a strict subset of tids.
+ for (auto it = pids.begin(); it != pids.end(); ++it) {
+ EXPECT_TRUE(tids.contains(*it))
+ << absl::StreamFormat("Have pid %d, but no such tid", *it);
+ }
+}
+
+TEST(Cgroup, ControllersMustBeInUniqueHierarchy) {
+ SKIP_IF(!CgroupsAvailable());
+
+ Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()));
+ // Hierarchy #1: all controllers.
+ Cgroup all = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs(""));
+ // Hierarchy #2: memory.
+ //
+ // This should conflict since memory is already in hierarchy #1, and the two
+ // hierarchies have different sets of controllers, so this mount can't be a
+ // view into hierarchy #1.
+ EXPECT_THAT(m.MountCgroupfs("memory"), PosixErrorIs(EBUSY, _))
+ << "Memory controller mounted on two hierarchies";
+ EXPECT_THAT(m.MountCgroupfs("cpu"), PosixErrorIs(EBUSY, _))
+ << "CPU controller mounted on two hierarchies";
+}
+
+TEST(Cgroup, UnmountFreesControllers) {
+ SKIP_IF(!CgroupsAvailable());
+ Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()));
+ Cgroup all = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs(""));
+ // All controllers are now attached to all's hierarchy. Attempting new mount
+ // with any individual controller should fail.
+ EXPECT_THAT(m.MountCgroupfs("memory"), PosixErrorIs(EBUSY, _))
+ << "Memory controller mounted on two hierarchies";
+
+ // Unmount the "all" hierarchy. This should enable any controller to be
+ // mounted on a new hierarchy again.
+ ASSERT_NO_ERRNO(m.Unmount(all));
+ EXPECT_NO_ERRNO(m.MountCgroupfs("memory"));
+ EXPECT_NO_ERRNO(m.MountCgroupfs("cpu"));
+}
+
+TEST(Cgroup, OnlyContainsControllerSpecificFiles) {
+ SKIP_IF(!CgroupsAvailable());
+ Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()));
+ Cgroup mem = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("memory"));
+ EXPECT_THAT(Exists(mem.Relpath("memory.usage_in_bytes")),
+ IsPosixErrorOkAndHolds(true));
+ // CPU files shouldn't exist in memory cgroups.
+ EXPECT_THAT(Exists(mem.Relpath("cpu.cfs_period_us")),
+ IsPosixErrorOkAndHolds(false));
+ EXPECT_THAT(Exists(mem.Relpath("cpu.cfs_quota_us")),
+ IsPosixErrorOkAndHolds(false));
+ EXPECT_THAT(Exists(mem.Relpath("cpu.shares")), IsPosixErrorOkAndHolds(false));
+
+ Cgroup cpu = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("cpu"));
+ EXPECT_THAT(Exists(cpu.Relpath("cpu.cfs_period_us")),
+ IsPosixErrorOkAndHolds(true));
+ EXPECT_THAT(Exists(cpu.Relpath("cpu.cfs_quota_us")),
+ IsPosixErrorOkAndHolds(true));
+ EXPECT_THAT(Exists(cpu.Relpath("cpu.shares")), IsPosixErrorOkAndHolds(true));
+ // Memory files shouldn't exist in cpu cgroups.
+ EXPECT_THAT(Exists(cpu.Relpath("memory.usage_in_bytes")),
+ IsPosixErrorOkAndHolds(false));
+}
+
+TEST(Cgroup, InvalidController) {
+ SKIP_IF(!CgroupsAvailable());
+
+ TempPath mountpoint = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ std::string mopts = "this-controller-is-invalid";
+ EXPECT_THAT(
+ mount("none", mountpoint.path().c_str(), "cgroup", 0, mopts.c_str()),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(Cgroup, MoptAllMustBeExclusive) {
+ SKIP_IF(!CgroupsAvailable());
+
+ TempPath mountpoint = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ std::string mopts = "all,cpu";
+ EXPECT_THAT(
+ mount("none", mountpoint.path().c_str(), "cgroup", 0, mopts.c_str()),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(MemoryCgroup, MemoryUsageInBytes) {
+ SKIP_IF(!CgroupsAvailable());
+
+ Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()));
+ Cgroup c = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("memory"));
+ EXPECT_THAT(c.ReadIntegerControlFile("memory.usage_in_bytes"),
+ IsPosixErrorOkAndHolds(Gt(0)));
+}
+
+TEST(CPUCgroup, ControlFilesHaveDefaultValues) {
+ SKIP_IF(!CgroupsAvailable());
+
+ Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()));
+ Cgroup c = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("cpu"));
+ EXPECT_THAT(c.ReadIntegerControlFile("cpu.cfs_quota_us"),
+ IsPosixErrorOkAndHolds(-1));
+ EXPECT_THAT(c.ReadIntegerControlFile("cpu.cfs_period_us"),
+ IsPosixErrorOkAndHolds(100000));
+ EXPECT_THAT(c.ReadIntegerControlFile("cpu.shares"),
+ IsPosixErrorOkAndHolds(1024));
+}
+
+TEST(CPUAcctCgroup, CPUAcctUsage) {
+ SKIP_IF(!CgroupsAvailable());
+
+ Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()));
+ Cgroup c = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("cpuacct"));
+
+ const int64_t usage =
+ ASSERT_NO_ERRNO_AND_VALUE(c.ReadIntegerControlFile("cpuacct.usage"));
+ const int64_t usage_user =
+ ASSERT_NO_ERRNO_AND_VALUE(c.ReadIntegerControlFile("cpuacct.usage_user"));
+ const int64_t usage_sys =
+ ASSERT_NO_ERRNO_AND_VALUE(c.ReadIntegerControlFile("cpuacct.usage_sys"));
+
+ EXPECT_GE(usage, 0);
+ EXPECT_GE(usage_user, 0);
+ EXPECT_GE(usage_sys, 0);
+
+ EXPECT_GE(usage_user + usage_sys, usage);
+}
+
+TEST(CPUAcctCgroup, CPUAcctStat) {
+ SKIP_IF(!CgroupsAvailable());
+
+ Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()));
+ Cgroup c = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("cpuacct"));
+
+ std::string stat =
+ ASSERT_NO_ERRNO_AND_VALUE(c.ReadControlFile("cpuacct.stat"));
+
+ // We're expecting the contents of "cpuacct.stat" to look similar to this:
+ //
+ // user 377986
+ // system 220662
+
+ std::vector<absl::string_view> lines =
+ absl::StrSplit(stat, '\n', absl::SkipEmpty());
+ ASSERT_EQ(lines.size(), 2);
+
+ std::vector<absl::string_view> user_tokens =
+ StrSplit(lines[0], absl::ByChar(' '));
+ EXPECT_EQ(user_tokens[0], "user");
+ EXPECT_THAT(Atoi<int64_t>(user_tokens[1]), IsPosixErrorOkAndHolds(Ge(0)));
+
+ std::vector<absl::string_view> sys_tokens =
+ StrSplit(lines[1], absl::ByChar(' '));
+ EXPECT_EQ(sys_tokens[0], "system");
+ EXPECT_THAT(Atoi<int64_t>(sys_tokens[1]), IsPosixErrorOkAndHolds(Ge(0)));
+}
+
+TEST(ProcCgroups, Empty) {
+ SKIP_IF(!CgroupsAvailable());
+
+ absl::flat_hash_map<std::string, CgroupsEntry> entries =
+ ASSERT_NO_ERRNO_AND_VALUE(ProcCgroupsEntries());
+ // No cgroups mounted yet, we should have no entries.
+ EXPECT_TRUE(entries.empty());
+}
+
+TEST(ProcCgroups, ProcCgroupsEntries) {
+ SKIP_IF(!CgroupsAvailable());
+
+ Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()));
+
+ Cgroup mem = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("memory"));
+ absl::flat_hash_map<std::string, CgroupsEntry> entries =
+ ASSERT_NO_ERRNO_AND_VALUE(ProcCgroupsEntries());
+ EXPECT_EQ(entries.size(), 1);
+ ASSERT_TRUE(entries.contains("memory"));
+ CgroupsEntry mem_e = entries["memory"];
+ EXPECT_EQ(mem_e.subsys_name, "memory");
+ EXPECT_GE(mem_e.hierarchy, 1);
+ // Expect a single root cgroup.
+ EXPECT_EQ(mem_e.num_cgroups, 1);
+ // Cgroups are currently always enabled when mounted.
+ EXPECT_TRUE(mem_e.enabled);
+
+ // Add a second cgroup, and check for new entry.
+
+ Cgroup cpu = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("cpu"));
+ entries = ASSERT_NO_ERRNO_AND_VALUE(ProcCgroupsEntries());
+ EXPECT_EQ(entries.size(), 2);
+ EXPECT_TRUE(entries.contains("memory")); // Still have memory entry.
+ ASSERT_TRUE(entries.contains("cpu"));
+ CgroupsEntry cpu_e = entries["cpu"];
+ EXPECT_EQ(cpu_e.subsys_name, "cpu");
+ EXPECT_GE(cpu_e.hierarchy, 1);
+ EXPECT_EQ(cpu_e.num_cgroups, 1);
+ EXPECT_TRUE(cpu_e.enabled);
+
+ // Separate hierarchies, since controllers were mounted separately.
+ EXPECT_NE(mem_e.hierarchy, cpu_e.hierarchy);
+}
+
+TEST(ProcCgroups, UnmountRemovesEntries) {
+ SKIP_IF(!CgroupsAvailable());
+
+ Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()));
+ Cgroup cg = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("cpu,memory"));
+ absl::flat_hash_map<std::string, CgroupsEntry> entries =
+ ASSERT_NO_ERRNO_AND_VALUE(ProcCgroupsEntries());
+ EXPECT_EQ(entries.size(), 2);
+
+ ASSERT_NO_ERRNO(m.Unmount(cg));
+
+ entries = ASSERT_NO_ERRNO_AND_VALUE(ProcCgroupsEntries());
+ EXPECT_TRUE(entries.empty());
+}
+
+TEST(ProcPIDCgroup, Empty) {
+ SKIP_IF(!CgroupsAvailable());
+
+ absl::flat_hash_map<std::string, PIDCgroupEntry> entries =
+ ASSERT_NO_ERRNO_AND_VALUE(ProcPIDCgroupEntries(getpid()));
+ EXPECT_TRUE(entries.empty());
+}
+
+TEST(ProcPIDCgroup, Entries) {
+ SKIP_IF(!CgroupsAvailable());
+
+ Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()));
+ Cgroup c = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("memory"));
+
+ absl::flat_hash_map<std::string, PIDCgroupEntry> entries =
+ ASSERT_NO_ERRNO_AND_VALUE(ProcPIDCgroupEntries(getpid()));
+ EXPECT_EQ(entries.size(), 1);
+ PIDCgroupEntry mem_e = entries["memory"];
+ EXPECT_GE(mem_e.hierarchy, 1);
+ EXPECT_EQ(mem_e.controllers, "memory");
+ EXPECT_EQ(mem_e.path, "/");
+
+ Cgroup c1 = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("cpu"));
+ entries = ASSERT_NO_ERRNO_AND_VALUE(ProcPIDCgroupEntries(getpid()));
+ EXPECT_EQ(entries.size(), 2);
+ EXPECT_TRUE(entries.contains("memory")); // Still have memory entry.
+ PIDCgroupEntry cpu_e = entries["cpu"];
+ EXPECT_GE(cpu_e.hierarchy, 1);
+ EXPECT_EQ(cpu_e.controllers, "cpu");
+ EXPECT_EQ(cpu_e.path, "/");
+
+ // Separate hierarchies, since controllers were mounted separately.
+ EXPECT_NE(mem_e.hierarchy, cpu_e.hierarchy);
+}
+
+TEST(ProcPIDCgroup, UnmountRemovesEntries) {
+ SKIP_IF(!CgroupsAvailable());
+
+ Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()));
+ Cgroup all = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs(""));
+
+ absl::flat_hash_map<std::string, PIDCgroupEntry> entries =
+ ASSERT_NO_ERRNO_AND_VALUE(ProcPIDCgroupEntries(getpid()));
+ EXPECT_GT(entries.size(), 0);
+
+ ASSERT_NO_ERRNO(m.Unmount(all));
+
+ entries = ASSERT_NO_ERRNO_AND_VALUE(ProcPIDCgroupEntries(getpid()));
+ EXPECT_TRUE(entries.empty());
+}
+
+TEST(ProcCgroup, PIDCgroupMatchesCgroups) {
+ SKIP_IF(!CgroupsAvailable());
+
+ Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()));
+ Cgroup c = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("memory"));
+ Cgroup c1 = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("cpu"));
+
+ absl::flat_hash_map<std::string, CgroupsEntry> cgroups_entries =
+ ASSERT_NO_ERRNO_AND_VALUE(ProcCgroupsEntries());
+ absl::flat_hash_map<std::string, PIDCgroupEntry> pid_entries =
+ ASSERT_NO_ERRNO_AND_VALUE(ProcPIDCgroupEntries(getpid()));
+
+ CgroupsEntry cgroup_mem = cgroups_entries["memory"];
+ PIDCgroupEntry pid_mem = pid_entries["memory"];
+
+ EXPECT_EQ(cgroup_mem.hierarchy, pid_mem.hierarchy);
+
+ CgroupsEntry cgroup_cpu = cgroups_entries["cpu"];
+ PIDCgroupEntry pid_cpu = pid_entries["cpu"];
+
+ EXPECT_EQ(cgroup_cpu.hierarchy, pid_cpu.hierarchy);
+ EXPECT_NE(cgroup_mem.hierarchy, cgroup_cpu.hierarchy);
+ EXPECT_NE(pid_mem.hierarchy, pid_cpu.hierarchy);
+}
+
+TEST(ProcCgroup, MultiControllerHierarchy) {
+ SKIP_IF(!CgroupsAvailable());
+
+ Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()));
+ Cgroup c = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs("memory,cpu"));
+
+ absl::flat_hash_map<std::string, CgroupsEntry> cgroups_entries =
+ ASSERT_NO_ERRNO_AND_VALUE(ProcCgroupsEntries());
+
+ CgroupsEntry mem_e = cgroups_entries["memory"];
+ CgroupsEntry cpu_e = cgroups_entries["cpu"];
+
+ // Both controllers should have the same hierarchy ID.
+ EXPECT_EQ(mem_e.hierarchy, cpu_e.hierarchy);
+
+ absl::flat_hash_map<std::string, PIDCgroupEntry> pid_entries =
+ ASSERT_NO_ERRNO_AND_VALUE(ProcPIDCgroupEntries(getpid()));
+
+ // Expecting an entry listing both controllers, that matches the previous
+ // hierarchy ID. Note that the controllers are listed in alphabetical order.
+ PIDCgroupEntry pid_e = pid_entries["cpu,memory"];
+ EXPECT_EQ(pid_e.hierarchy, mem_e.hierarchy);
+}
+
+} // namespace
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/semaphore.cc b/test/syscalls/linux/semaphore.cc
index 28f51a3bf..8c5732147 100644
--- a/test/syscalls/linux/semaphore.cc
+++ b/test/syscalls/linux/semaphore.cc
@@ -234,14 +234,6 @@ TEST(SemaphoreTest, SemTimedOpBlock) {
AutoSem sem(semget(IPC_PRIVATE, 1, 0600 | IPC_CREAT));
ASSERT_THAT(sem.get(), SyscallSucceeds());
- ScopedThread th([&sem] {
- absl::SleepFor(absl::Milliseconds(100));
-
- struct sembuf buf = {};
- buf.sem_op = 1;
- ASSERT_THAT(RetryEINTR(semop)(sem.get(), &buf, 1), SyscallSucceeds());
- });
-
struct sembuf buf = {};
buf.sem_op = -1;
struct timespec timeout = {};
diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc
index 597b5bcb1..d391363fb 100644
--- a/test/syscalls/linux/socket_inet_loopback.cc
+++ b/test/syscalls/linux/socket_inet_loopback.cc
@@ -489,13 +489,6 @@ void TestListenWhileConnect(const TestParam& param,
TestAddress const& listener = param.listener;
TestAddress const& connector = param.connector;
- constexpr int kBacklog = 2;
- // Linux completes one more connection than the listen backlog argument.
- // To ensure that there is at least one client connection that stays in
- // connecting state, keep 2 more client connections than the listen backlog.
- // gVisor differs in this behavior though, gvisor.dev/issue/3153.
- constexpr int kClients = kBacklog + 2;
-
// Create the listening socket.
FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE(
Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP));
@@ -503,6 +496,13 @@ void TestListenWhileConnect(const TestParam& param,
ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr),
listener.addr_len),
SyscallSucceeds());
+ // This test is only interested in deterministically getting a socket in
+ // connecting state. For that, we use a listen backlog of zero which would
+ // mean there is exactly one connection that gets established and is enqueued
+ // to the accept queue. We poll on the listener to ensure that is enqueued.
+ // After that the subsequent client connect will stay in connecting state as
+ // the accept queue is full.
+ constexpr int kBacklog = 0;
ASSERT_THAT(listen(listen_fd.get(), kBacklog), SyscallSucceeds());
// Get the port bound by the listening socket.
@@ -515,42 +515,49 @@ void TestListenWhileConnect(const TestParam& param,
sockaddr_storage conn_addr = connector.addr;
ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port));
- std::vector<FileDescriptor> clients;
- for (int i = 0; i < kClients; i++) {
- FileDescriptor client = ASSERT_NO_ERRNO_AND_VALUE(
- Socket(connector.family(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP));
- int ret = connect(client.get(), reinterpret_cast<sockaddr*>(&conn_addr),
- connector.addr_len);
- if (ret != 0) {
- EXPECT_THAT(ret, SyscallFailsWithErrno(EINPROGRESS));
- clients.push_back(std::move(client));
- }
+ FileDescriptor established_client = ASSERT_NO_ERRNO_AND_VALUE(
+ Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP));
+ ASSERT_THAT(
+ connect(established_client.get(), reinterpret_cast<sockaddr*>(&conn_addr),
+ connector.addr_len),
+ SyscallSucceeds());
+
+ // Ensure that the accept queue has the completed connection.
+ constexpr int kTimeout = 10000;
+ pollfd pfd = {
+ .fd = listen_fd.get(),
+ .events = POLLIN,
+ };
+ ASSERT_THAT(poll(&pfd, 1, kTimeout), SyscallSucceedsWithValue(1));
+ ASSERT_EQ(pfd.revents, POLLIN);
+
+ FileDescriptor connecting_client = ASSERT_NO_ERRNO_AND_VALUE(
+ Socket(connector.family(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP));
+ // Keep the last client in connecting state.
+ int ret =
+ connect(connecting_client.get(), reinterpret_cast<sockaddr*>(&conn_addr),
+ connector.addr_len);
+ if (ret != 0) {
+ EXPECT_THAT(ret, SyscallFailsWithErrno(EINPROGRESS));
}
stopListen(listen_fd);
- for (auto& client : clients) {
- constexpr int kTimeout = 10000;
+ std::array<std::pair<int, int>, 2> sockets = {
+ std::make_pair(established_client.get(), ECONNRESET),
+ std::make_pair(connecting_client.get(), ECONNREFUSED),
+ };
+ for (size_t i = 0; i < sockets.size(); i++) {
+ SCOPED_TRACE(absl::StrCat("i=", i));
+ auto [fd, expected_errno] = sockets[i];
pollfd pfd = {
- .fd = client.get(),
- .events = POLLIN,
+ .fd = fd,
};
- // When the listening socket is closed, then we expect the remote to reset
- // the connection.
- ASSERT_THAT(poll(&pfd, 1, kTimeout), SyscallSucceedsWithValue(1));
- ASSERT_EQ(pfd.revents, POLLIN | POLLHUP | POLLERR);
+ // When the listening socket is closed, the peer would reset the connection.
+ EXPECT_THAT(poll(&pfd, 1, kTimeout), SyscallSucceedsWithValue(1));
+ EXPECT_EQ(pfd.revents, POLLHUP | POLLERR);
char c;
- // Subsequent read can fail with:
- // ECONNRESET: If the client connection was established and was reset by the
- // remote.
- // ECONNREFUSED: If the client connection failed to be established.
- ASSERT_THAT(read(client.get(), &c, sizeof(c)),
- AnyOf(SyscallFailsWithErrno(ECONNRESET),
- SyscallFailsWithErrno(ECONNREFUSED)));
- // The last client connection would be in connecting (SYN_SENT) state.
- if (client.get() == clients[kClients - 1].get()) {
- ASSERT_EQ(errno, ECONNREFUSED) << strerror(errno);
- }
+ EXPECT_THAT(read(fd, &c, sizeof(c)), SyscallFailsWithErrno(expected_errno));
}
}
@@ -570,7 +577,59 @@ TEST_P(SocketInetLoopbackTest, TCPListenShutdownWhileConnect) {
// random save as established connections which can't be delivered to the accept
// queue because the queue is full are not correctly delivered after restore
// causing the last accept to timeout on the restore.
-TEST_P(SocketInetLoopbackTest, TCPbacklog_NoRandomSave) {
+TEST_P(SocketInetLoopbackTest, TCPAcceptBacklogSizes_NoRandomSave) {
+ auto const& param = GetParam();
+
+ TestAddress const& listener = param.listener;
+ TestAddress const& connector = param.connector;
+
+ // Create the listening socket.
+ const FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE(
+ Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP));
+ sockaddr_storage listen_addr = listener.addr;
+ ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr),
+ listener.addr_len),
+ SyscallSucceeds());
+ // Get the port bound by the listening socket.
+ socklen_t addrlen = listener.addr_len;
+ ASSERT_THAT(getsockname(listen_fd.get(),
+ reinterpret_cast<sockaddr*>(&listen_addr), &addrlen),
+ SyscallSucceeds());
+ uint16_t const port =
+ ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr));
+ std::array<int, 3> backlogs = {-1, 0, 1};
+ for (auto& backlog : backlogs) {
+ ASSERT_THAT(listen(listen_fd.get(), backlog), SyscallSucceeds());
+
+ int expected_accepts;
+ if (backlog < 0) {
+ expected_accepts = 1024;
+ } else {
+ expected_accepts = backlog + 1;
+ }
+ for (int i = 0; i < expected_accepts; i++) {
+ SCOPED_TRACE(absl::StrCat("i=", i));
+ // Connect to the listening socket.
+ const FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE(
+ Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP));
+ sockaddr_storage conn_addr = connector.addr;
+ ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port));
+ ASSERT_THAT(
+ RetryEINTR(connect)(conn_fd.get(),
+ reinterpret_cast<struct sockaddr*>(&conn_addr),
+ connector.addr_len),
+ SyscallSucceeds());
+ const FileDescriptor accepted =
+ ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr));
+ }
+ }
+}
+
+// TODO(b/157236388): Remove _NoRandomSave once bug is fixed. Test fails w/
+// random save as established connections which can't be delivered to the accept
+// queue because the queue is full are not correctly delivered after restore
+// causing the last accept to timeout on the restore.
+TEST_P(SocketInetLoopbackTest, TCPBacklog_NoRandomSave) {
auto const& param = GetParam();
TestAddress const& listener = param.listener;
@@ -595,6 +654,7 @@ TEST_P(SocketInetLoopbackTest, TCPbacklog_NoRandomSave) {
ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr));
int i = 0;
while (1) {
+ SCOPED_TRACE(absl::StrCat("i=", i));
int ret;
// Connect to the listening socket.
@@ -620,103 +680,133 @@ TEST_P(SocketInetLoopbackTest, TCPbacklog_NoRandomSave) {
i++;
}
+ int client_conns = i;
+ int accepted_conns = 0;
for (; i != 0; i--) {
- // Accept the connection.
- //
- // We have to assign a name to the accepted socket, as unamed temporary
- // objects are destructed upon full evaluation of the expression it is in,
- // potentially causing the connecting socket to fail to shutdown properly.
- auto accepted =
- ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr));
+ SCOPED_TRACE(absl::StrCat("i=", i));
+ pollfd pfd = {
+ .fd = listen_fd.get(),
+ .events = POLLIN,
+ };
+ // Look for incoming connections to accept. The last connect request could
+ // be established from the client side, but the ACK of the handshake could
+ // be dropped by the listener if the accept queue was filled up by the
+ // previous connect.
+ int ret;
+ ASSERT_THAT(ret = poll(&pfd, 1, 3000), SyscallSucceeds());
+ if (ret == 0) break;
+ if (pfd.revents == POLLIN) {
+ // Accept the connection.
+ //
+ // We have to assign a name to the accepted socket, as unamed temporary
+ // objects are destructed upon full evaluation of the expression it is in,
+ // potentially causing the connecting socket to fail to shutdown properly.
+ auto accepted =
+ ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr));
+ accepted_conns++;
+ }
}
+ // We should accept at least listen backlog + 1 connections. As the stack is
+ // enqueuing established connections to the accept queue, newer SYNs could
+ // still be replied to causing those client connections would be accepted as
+ // we start dequeuing the queue.
+ ASSERT_GE(accepted_conns, kBacklogSize + 1);
+ ASSERT_GE(client_conns, accepted_conns);
}
-// Test if the stack completes atmost listen backlog number of client
-// connections. It exercises the path of the stack that enqueues completed
-// connections to accept queue vs new incoming SYNs.
-TEST_P(SocketInetLoopbackTest, TCPConnectBacklog_NoRandomSave) {
- const auto& param = GetParam();
- const TestAddress& listener = param.listener;
- const TestAddress& connector = param.connector;
+// TODO(b/157236388): Remove _NoRandomSave once bug is fixed. Test fails w/
+// random save as established connections which can't be delivered to the accept
+// queue because the queue is full are not correctly delivered after restore
+// causing the last accept to timeout on the restore.
+TEST_P(SocketInetLoopbackTest, TCPBacklogAcceptAll_NoRandomSave) {
+ auto const& param = GetParam();
+ TestAddress const& listener = param.listener;
+ TestAddress const& connector = param.connector;
+ // Create the listening socket.
+ FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE(
+ Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP));
+ sockaddr_storage listen_addr = listener.addr;
+ ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr),
+ listener.addr_len),
+ SyscallSucceeds());
constexpr int kBacklog = 1;
- // Keep the number of client connections more than the listen backlog.
- // Linux completes one more connection than the listen backlog argument.
- // gVisor differs in this behavior though, gvisor.dev/issue/3153.
- int kClients = kBacklog + 2;
- if (IsRunningOnGvisor()) {
- kClients--;
- }
+ ASSERT_THAT(listen(listen_fd.get(), kBacklog), SyscallSucceeds());
- // Run the following test for few iterations to test race between accept queue
- // getting filled with incoming SYNs.
- for (int num = 0; num < 10; num++) {
- FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE(
- Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP));
- sockaddr_storage listen_addr = listener.addr;
- ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr),
- listener.addr_len),
- SyscallSucceeds());
- ASSERT_THAT(listen(listen_fd.get(), kBacklog), SyscallSucceeds());
+ // Get the port bound by the listening socket.
+ socklen_t addrlen = listener.addr_len;
+ ASSERT_THAT(getsockname(listen_fd.get(),
+ reinterpret_cast<sockaddr*>(&listen_addr), &addrlen),
+ SyscallSucceeds());
+ uint16_t const port =
+ ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr));
- socklen_t addrlen = listener.addr_len;
- ASSERT_THAT(
- getsockname(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr),
- &addrlen),
- SyscallSucceeds());
- uint16_t const port =
- ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr));
- sockaddr_storage conn_addr = connector.addr;
- ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port));
+ sockaddr_storage conn_addr = connector.addr;
+ ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port));
- std::vector<FileDescriptor> clients;
- // Issue multiple non-blocking client connects.
- for (int i = 0; i < kClients; i++) {
- FileDescriptor client = ASSERT_NO_ERRNO_AND_VALUE(
- Socket(connector.family(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP));
- int ret = connect(client.get(), reinterpret_cast<sockaddr*>(&conn_addr),
- connector.addr_len);
- if (ret != 0) {
- EXPECT_THAT(ret, SyscallFailsWithErrno(EINPROGRESS));
- }
- clients.push_back(std::move(client));
+ // Fill up the accept queue and trigger more client connections which would be
+ // waiting to be accepted.
+ std::array<FileDescriptor, kBacklog + 1> established_clients;
+ for (auto& fd : established_clients) {
+ fd = ASSERT_NO_ERRNO_AND_VALUE(
+ Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP));
+ ASSERT_THAT(connect(fd.get(), reinterpret_cast<sockaddr*>(&conn_addr),
+ connector.addr_len),
+ SyscallSucceeds());
+ }
+ std::array<FileDescriptor, kBacklog> waiting_clients;
+ for (auto& fd : waiting_clients) {
+ fd = ASSERT_NO_ERRNO_AND_VALUE(
+ Socket(connector.family(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP));
+ int ret = connect(fd.get(), reinterpret_cast<sockaddr*>(&conn_addr),
+ connector.addr_len);
+ if (ret != 0) {
+ EXPECT_THAT(ret, SyscallFailsWithErrno(EINPROGRESS));
}
+ }
- // Now that client connects are issued, wait for the accept queue to get
- // filled and ensure no new client connection is completed.
- for (int i = 0; i < kClients; i++) {
- pollfd pfd = {
- .fd = clients[i].get(),
- .events = POLLOUT,
- };
- if (i < kClients - 1) {
- // Poll for client side connection completions with a large timeout.
- // We cannot poll on the listener side without calling accept as poll
- // stays level triggered with non-zero accept queue length.
- //
- // Client side poll would not guarantee that the completed connection
- // has been enqueued in to the acccept queue, but the fact that the
- // listener ACKd the SYN, means that it cannot complete any new incoming
- // SYNs when it has already ACKd for > backlog number of SYNs.
- ASSERT_THAT(poll(&pfd, 1, 10000), SyscallSucceedsWithValue(1))
- << "num=" << num << " i=" << i << " kClients=" << kClients;
- ASSERT_EQ(pfd.revents, POLLOUT) << "num=" << num << " i=" << i;
- } else {
- // Now that we expect accept queue filled up, ensure that the last
- // client connection never completes with a smaller poll timeout.
- ASSERT_THAT(poll(&pfd, 1, 1000), SyscallSucceedsWithValue(0))
- << "num=" << num << " i=" << i;
- }
+ auto accept_connection = [&]() {
+ constexpr int kTimeout = 10000;
+ pollfd pfd = {
+ .fd = listen_fd.get(),
+ .events = POLLIN,
+ };
+ ASSERT_THAT(poll(&pfd, 1, kTimeout), SyscallSucceedsWithValue(1));
+ ASSERT_EQ(pfd.revents, POLLIN);
+ // Accept the connection.
+ //
+ // We have to assign a name to the accepted socket, as unamed temporary
+ // objects are destructed upon full evaluation of the expression it is in,
+ // potentially causing the connecting socket to fail to shutdown properly.
+ auto accepted =
+ ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr));
+ };
- ASSERT_THAT(close(clients[i].release()), SyscallSucceedsWithValue(0))
- << "num=" << num << " i=" << i;
- }
- clients.clear();
- // We close the listening side and open a new listener. We could instead
- // drain the accept queue by calling accept() and reuse the listener, but
- // that is racy as the retransmitted SYNs could get ACKd as we make room in
- // the accept queue.
- ASSERT_THAT(close(listen_fd.release()), SyscallSucceedsWithValue(0));
+ // Ensure that we accept all client connections. The waiting connections would
+ // get enqueued as we drain the accept queue.
+ for (int i = 0; i < std::size(established_clients); i++) {
+ SCOPED_TRACE(absl::StrCat("established clients i=", i));
+ accept_connection();
+ }
+
+ // The waiting client connections could be in one of these 2 states:
+ // (1) SYN_SENT: if the SYN was dropped because accept queue was full
+ // (2) ESTABLISHED: if the listener sent back a SYNACK, but may have dropped
+ // the ACK from the client if the accept queue was full (send out a data to
+ // re-send that ACK, to address that case).
+ for (int i = 0; i < std::size(waiting_clients); i++) {
+ SCOPED_TRACE(absl::StrCat("waiting clients i=", i));
+ constexpr int kTimeout = 10000;
+ pollfd pfd = {
+ .fd = waiting_clients[i].get(),
+ .events = POLLOUT,
+ };
+ EXPECT_THAT(poll(&pfd, 1, kTimeout), SyscallSucceedsWithValue(1));
+ EXPECT_EQ(pfd.revents, POLLOUT);
+ char c;
+ EXPECT_THAT(RetryEINTR(send)(waiting_clients[i].get(), &c, sizeof(c), 0),
+ SyscallSucceedsWithValue(sizeof(c)));
+ accept_connection();
}
}
diff --git a/test/syscalls/linux/verity_ioctl.cc b/test/syscalls/linux/verity_ioctl.cc
new file mode 100644
index 000000000..dcd28f2c3
--- /dev/null
+++ b/test/syscalls/linux/verity_ioctl.cc
@@ -0,0 +1,133 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/mount.h>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "test/util/capability_util.h"
+#include "test/util/fs_util.h"
+#include "test/util/mount_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+#ifndef FS_IOC_ENABLE_VERITY
+#define FS_IOC_ENABLE_VERITY 1082156677
+#endif
+
+#ifndef FS_IOC_MEASURE_VERITY
+#define FS_IOC_MEASURE_VERITY 3221513862
+#endif
+
+#ifndef FS_VERITY_FL
+#define FS_VERITY_FL 1048576
+#endif
+
+#ifndef FS_IOC_GETFLAGS
+#define FS_IOC_GETFLAGS 2148034049
+#endif
+
+struct fsverity_digest {
+ __u16 digest_algorithm;
+ __u16 digest_size; /* input/output */
+ __u8 digest[];
+};
+
+const int fsverity_max_digest_size = 64;
+const int fsverity_default_digest_size = 32;
+
+class IoctlTest : public ::testing::Test {
+ protected:
+ void SetUp() override {
+ // Verity is implemented in VFS2.
+ SKIP_IF(IsRunningWithVFS1());
+
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+ // Mount a tmpfs file system, to be wrapped by a verity fs.
+ tmpfs_dir_ = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ ASSERT_THAT(mount("", tmpfs_dir_.path().c_str(), "tmpfs", 0, ""),
+ SyscallSucceeds());
+
+ // Create a new file in the tmpfs mount.
+ constexpr char kContents[] = "foobarbaz";
+ file_ = ASSERT_NO_ERRNO_AND_VALUE(
+ TempPath::CreateFileWith(tmpfs_dir_.path(), kContents, 0777));
+ filename_ = Basename(file_.path());
+ }
+
+ TempPath tmpfs_dir_;
+ TempPath file_;
+ std::string filename_;
+};
+
+TEST_F(IoctlTest, Enable) {
+ // mount a verity fs on the existing tmpfs mount.
+ std::string mount_opts = "lower_path=" + tmpfs_dir_.path();
+ auto const verity_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ ASSERT_THAT(
+ mount("", verity_dir.path().c_str(), "verity", 0, mount_opts.c_str()),
+ SyscallSucceeds());
+
+ printf("verity path: %s, filename: %s\n", verity_dir.path().c_str(),
+ filename_.c_str());
+ fflush(nullptr);
+ // Confirm that the verity flag is absent.
+ int flag = 0;
+ auto const fd = ASSERT_NO_ERRNO_AND_VALUE(
+ Open(JoinPath(verity_dir.path(), filename_), O_RDONLY, 0777));
+ ASSERT_THAT(ioctl(fd.get(), FS_IOC_GETFLAGS, &flag), SyscallSucceeds());
+ EXPECT_EQ(flag & FS_VERITY_FL, 0);
+
+ // Enable the file and confirm that the verity flag is present.
+ ASSERT_THAT(ioctl(fd.get(), FS_IOC_ENABLE_VERITY), SyscallSucceeds());
+ ASSERT_THAT(ioctl(fd.get(), FS_IOC_GETFLAGS, &flag), SyscallSucceeds());
+ EXPECT_EQ(flag & FS_VERITY_FL, FS_VERITY_FL);
+}
+
+TEST_F(IoctlTest, Measure) {
+ // mount a verity fs on the existing tmpfs mount.
+ std::string mount_opts = "lower_path=" + tmpfs_dir_.path();
+ auto const verity_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ ASSERT_THAT(
+ mount("", verity_dir.path().c_str(), "verity", 0, mount_opts.c_str()),
+ SyscallSucceeds());
+
+ // Confirm that the file cannot be measured.
+ auto const fd = ASSERT_NO_ERRNO_AND_VALUE(
+ Open(JoinPath(verity_dir.path(), filename_), O_RDONLY, 0777));
+ int digest_size = sizeof(struct fsverity_digest) + fsverity_max_digest_size;
+ struct fsverity_digest *digest =
+ reinterpret_cast<struct fsverity_digest *>(malloc(digest_size));
+ memset(digest, 0, digest_size);
+ digest->digest_size = fsverity_max_digest_size;
+ ASSERT_THAT(ioctl(fd.get(), FS_IOC_MEASURE_VERITY, digest),
+ SyscallFailsWithErrno(ENODATA));
+
+ // Enable the file and confirm that the file can be measured.
+ ASSERT_THAT(ioctl(fd.get(), FS_IOC_ENABLE_VERITY), SyscallSucceeds());
+ ASSERT_THAT(ioctl(fd.get(), FS_IOC_MEASURE_VERITY, digest),
+ SyscallSucceeds());
+ EXPECT_EQ(digest->digest_size, fsverity_default_digest_size);
+ free(digest);
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/syscalls/linux/verity_mount.cc b/test/syscalls/linux/verity_mount.cc
new file mode 100644
index 000000000..e73dd5599
--- /dev/null
+++ b/test/syscalls/linux/verity_mount.cc
@@ -0,0 +1,53 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/mount.h>
+
+#include <iomanip>
+#include <sstream>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "test/util/capability_util.h"
+#include "test/util/temp_path.h"
+#include "test/util/test_util.h"
+
+namespace gvisor {
+namespace testing {
+
+namespace {
+
+// Mount verity file system on an existing gofer mount.
+TEST(MountTest, MountExisting) {
+ // Verity is implemented in VFS2.
+ SKIP_IF(IsRunningWithVFS1());
+
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+ // Mount a new tmpfs file system.
+ auto const tmpfs_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ ASSERT_THAT(mount("", tmpfs_dir.path().c_str(), "tmpfs", 0, ""),
+ SyscallSucceeds());
+
+ // Mount a verity file system on the existing gofer mount.
+ auto const verity_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+ std::string opts = "lower_path=" + tmpfs_dir.path();
+ EXPECT_THAT(mount("", verity_dir.path().c_str(), "verity", 0, opts.c_str()),
+ SyscallSucceeds());
+}
+
+} // namespace
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/util/BUILD b/test/util/BUILD
index e561f3daa..383de00ed 100644
--- a/test/util/BUILD
+++ b/test/util/BUILD
@@ -94,6 +94,7 @@ cc_library(
":file_descriptor",
":posix_error",
"@com_google_absl//absl/strings",
+ "@com_google_absl//absl/time",
gtest,
],
)
@@ -368,3 +369,20 @@ cc_library(
testonly = 1,
hdrs = ["temp_umask.h"],
)
+
+cc_library(
+ name = "cgroup_util",
+ testonly = 1,
+ srcs = ["cgroup_util.cc"],
+ hdrs = ["cgroup_util.h"],
+ deps = [
+ ":cleanup",
+ ":fs_util",
+ ":mount_util",
+ ":posix_error",
+ ":temp_path",
+ "@com_google_absl//absl/container:flat_hash_map",
+ "@com_google_absl//absl/container:flat_hash_set",
+ "@com_google_absl//absl/strings",
+ ],
+)
diff --git a/test/util/cgroup_util.cc b/test/util/cgroup_util.cc
new file mode 100644
index 000000000..65d9c4986
--- /dev/null
+++ b/test/util/cgroup_util.cc
@@ -0,0 +1,223 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test/util/cgroup_util.h"
+
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "absl/strings/str_split.h"
+#include "test/util/fs_util.h"
+#include "test/util/mount_util.h"
+
+namespace gvisor {
+namespace testing {
+
+Cgroup::Cgroup(std::string path) : cgroup_path_(path) {
+ id_ = ++Cgroup::next_id_;
+ std::cerr << absl::StreamFormat("[cg#%d] <= %s", id_, cgroup_path_)
+ << std::endl;
+}
+
+PosixErrorOr<std::string> Cgroup::ReadControlFile(
+ absl::string_view name) const {
+ std::string buf;
+ RETURN_IF_ERRNO(GetContents(Relpath(name), &buf));
+
+ const std::string alias_path = absl::StrFormat("[cg#%d]/%s", id_, name);
+ std::cerr << absl::StreamFormat("<contents of %s>", alias_path) << std::endl;
+ std::cerr << buf;
+ std::cerr << absl::StreamFormat("<end of %s>", alias_path) << std::endl;
+
+ return buf;
+}
+
+PosixErrorOr<int64_t> Cgroup::ReadIntegerControlFile(
+ absl::string_view name) const {
+ ASSIGN_OR_RETURN_ERRNO(const std::string buf, ReadControlFile(name));
+ ASSIGN_OR_RETURN_ERRNO(const int64_t val, Atoi<int64_t>(buf));
+ return val;
+}
+
+PosixErrorOr<absl::flat_hash_set<pid_t>> Cgroup::Procs() const {
+ ASSIGN_OR_RETURN_ERRNO(std::string buf, ReadControlFile("cgroup.procs"));
+ return ParsePIDList(buf);
+}
+
+PosixErrorOr<absl::flat_hash_set<pid_t>> Cgroup::Tasks() const {
+ ASSIGN_OR_RETURN_ERRNO(std::string buf, ReadControlFile("tasks"));
+ return ParsePIDList(buf);
+}
+
+PosixError Cgroup::ContainsCallingProcess() const {
+ ASSIGN_OR_RETURN_ERRNO(const absl::flat_hash_set<pid_t> procs, Procs());
+ ASSIGN_OR_RETURN_ERRNO(const absl::flat_hash_set<pid_t> tasks, Tasks());
+ const pid_t pid = getpid();
+ const pid_t tid = syscall(SYS_gettid);
+ if (!procs.contains(pid)) {
+ return PosixError(
+ ENOENT, absl::StrFormat("Cgroup doesn't contain process %d", pid));
+ }
+ if (!tasks.contains(tid)) {
+ return PosixError(ENOENT,
+ absl::StrFormat("Cgroup doesn't contain task %d", tid));
+ }
+ return NoError();
+}
+
+PosixErrorOr<absl::flat_hash_set<pid_t>> Cgroup::ParsePIDList(
+ absl::string_view data) const {
+ absl::flat_hash_set<pid_t> res;
+ std::vector<absl::string_view> lines = absl::StrSplit(data, '\n');
+ for (const std::string_view& line : lines) {
+ if (line.empty()) {
+ continue;
+ }
+ ASSIGN_OR_RETURN_ERRNO(const int32_t pid, Atoi<int32_t>(line));
+ res.insert(static_cast<pid_t>(pid));
+ }
+ return res;
+}
+
+int64_t Cgroup::next_id_ = 0;
+
+PosixErrorOr<Cgroup> Mounter::MountCgroupfs(std::string mopts) {
+ ASSIGN_OR_RETURN_ERRNO(TempPath mountpoint,
+ TempPath::CreateDirIn(root_.path()));
+ ASSIGN_OR_RETURN_ERRNO(
+ Cleanup mount, Mount("none", mountpoint.path(), "cgroup", 0, mopts, 0));
+ const std::string mountpath = mountpoint.path();
+ std::cerr << absl::StreamFormat(
+ "Mount(\"none\", \"%s\", \"cgroup\", 0, \"%s\", 0) => OK",
+ mountpath, mopts)
+ << std::endl;
+ Cgroup cg = Cgroup(mountpath);
+ mountpoints_[cg.id()] = std::move(mountpoint);
+ mounts_[cg.id()] = std::move(mount);
+ return cg;
+}
+
+PosixError Mounter::Unmount(const Cgroup& c) {
+ auto mount = mounts_.find(c.id());
+ auto mountpoint = mountpoints_.find(c.id());
+
+ if (mount == mounts_.end() || mountpoint == mountpoints_.end()) {
+ return PosixError(
+ ESRCH, absl::StrFormat("No mount found for cgroupfs containing cg#%d",
+ c.id()));
+ }
+
+ std::cerr << absl::StreamFormat("Unmount([cg#%d])", c.id()) << std::endl;
+
+ // Simply delete the entries, their destructors will unmount and delete the
+ // mountpoint. Note the order is important to avoid errors: mount then
+ // mountpoint.
+ mounts_.erase(mount);
+ mountpoints_.erase(mountpoint);
+
+ return NoError();
+}
+
+constexpr char kProcCgroupsHeader[] =
+ "#subsys_name\thierarchy\tnum_cgroups\tenabled";
+
+PosixErrorOr<absl::flat_hash_map<std::string, CgroupsEntry>>
+ProcCgroupsEntries() {
+ std::string content;
+ RETURN_IF_ERRNO(GetContents("/proc/cgroups", &content));
+
+ bool found_header = false;
+ absl::flat_hash_map<std::string, CgroupsEntry> entries;
+ std::vector<std::string> lines = absl::StrSplit(content, '\n');
+ std::cerr << "<contents of /proc/cgroups>" << std::endl;
+ for (const std::string& line : lines) {
+ std::cerr << line << std::endl;
+
+ if (!found_header) {
+ EXPECT_EQ(line, kProcCgroupsHeader);
+ found_header = true;
+ continue;
+ }
+ if (line.empty()) {
+ continue;
+ }
+
+ // Parse a single entry from /proc/cgroups.
+ //
+ // Example entries, fields are tab separated in the real file:
+ //
+ // #subsys_name hierarchy num_cgroups enabled
+ // cpuset 12 35 1
+ // cpu 3 222 1
+ // ^ ^ ^ ^
+ // 0 1 2 3
+
+ CgroupsEntry entry;
+ std::vector<std::string> fields =
+ StrSplit(line, absl::ByAnyChar(": \t"), absl::SkipEmpty());
+
+ entry.subsys_name = fields[0];
+ ASSIGN_OR_RETURN_ERRNO(entry.hierarchy, Atoi<uint32_t>(fields[1]));
+ ASSIGN_OR_RETURN_ERRNO(entry.num_cgroups, Atoi<uint64_t>(fields[2]));
+ ASSIGN_OR_RETURN_ERRNO(const int enabled, Atoi<int>(fields[3]));
+ entry.enabled = enabled != 0;
+
+ entries[entry.subsys_name] = entry;
+ }
+ std::cerr << "<end of /proc/cgroups>" << std::endl;
+
+ return entries;
+}
+
+PosixErrorOr<absl::flat_hash_map<std::string, PIDCgroupEntry>>
+ProcPIDCgroupEntries(pid_t pid) {
+ const std::string path = absl::StrFormat("/proc/%d/cgroup", pid);
+ std::string content;
+ RETURN_IF_ERRNO(GetContents(path, &content));
+
+ absl::flat_hash_map<std::string, PIDCgroupEntry> entries;
+ std::vector<std::string> lines = absl::StrSplit(content, '\n');
+
+ std::cerr << absl::StreamFormat("<contents of %s>", path) << std::endl;
+ for (const std::string& line : lines) {
+ std::cerr << line << std::endl;
+
+ if (line.empty()) {
+ continue;
+ }
+
+ // Parse a single entry from /proc/<pid>/cgroup.
+ //
+ // Example entries:
+ //
+ // 2:cpu:/path/to/cgroup
+ // 1:memory:/
+
+ PIDCgroupEntry entry;
+ std::vector<std::string> fields =
+ absl::StrSplit(line, absl::ByChar(':'), absl::SkipEmpty());
+
+ ASSIGN_OR_RETURN_ERRNO(entry.hierarchy, Atoi<uint32_t>(fields[0]));
+ entry.controllers = fields[1];
+ entry.path = fields[2];
+
+ entries[entry.controllers] = entry;
+ }
+ std::cerr << absl::StreamFormat("<end of %s>", path) << std::endl;
+
+ return entries;
+}
+
+} // namespace testing
+} // namespace gvisor
diff --git a/test/util/cgroup_util.h b/test/util/cgroup_util.h
new file mode 100644
index 000000000..b049559df
--- /dev/null
+++ b/test/util/cgroup_util.h
@@ -0,0 +1,111 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GVISOR_TEST_UTIL_CGROUP_UTIL_H_
+#define GVISOR_TEST_UTIL_CGROUP_UTIL_H_
+
+#include <unistd.h>
+
+#include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
+#include "absl/strings/string_view.h"
+#include "test/util/cleanup.h"
+#include "test/util/fs_util.h"
+#include "test/util/temp_path.h"
+
+namespace gvisor {
+namespace testing {
+
+// Cgroup represents a cgroup directory on a mounted cgroupfs.
+class Cgroup {
+ public:
+ Cgroup(std::string path);
+
+ uint64_t id() const { return id_; }
+
+ std::string Relpath(absl::string_view leaf) const {
+ return JoinPath(cgroup_path_, leaf);
+ }
+
+ // Returns the contents of a cgroup control file with the given name.
+ PosixErrorOr<std::string> ReadControlFile(absl::string_view name) const;
+
+ // Reads the contents of a cgroup control with the given name, and attempts
+ // to parse it as an integer.
+ PosixErrorOr<int64_t> ReadIntegerControlFile(absl::string_view name) const;
+
+ // Returns the thread ids of the leaders of thread groups managed by this
+ // cgroup.
+ PosixErrorOr<absl::flat_hash_set<pid_t>> Procs() const;
+
+ PosixErrorOr<absl::flat_hash_set<pid_t>> Tasks() const;
+
+ // ContainsCallingProcess checks whether the calling process is part of the
+ PosixError ContainsCallingProcess() const;
+
+ private:
+ PosixErrorOr<absl::flat_hash_set<pid_t>> ParsePIDList(
+ absl::string_view data) const;
+
+ static int64_t next_id_;
+ int64_t id_;
+ const std::string cgroup_path_;
+};
+
+// Mounter is a utility for creating cgroupfs mounts. It automatically manages
+// the lifetime of created mounts.
+class Mounter {
+ public:
+ Mounter(TempPath root) : root_(std::move(root)) {}
+
+ PosixErrorOr<Cgroup> MountCgroupfs(std::string mopts);
+
+ PosixError Unmount(const Cgroup& c);
+
+ private:
+ // The destruction order of these members avoids errors during cleanup. We
+ // first unmount (by executing the mounts_ cleanups), then delete the
+ // mountpoint subdirs, then delete the root.
+ TempPath root_;
+ absl::flat_hash_map<int64_t, TempPath> mountpoints_;
+ absl::flat_hash_map<int64_t, Cleanup> mounts_;
+};
+
+// Represents a line from /proc/cgroups.
+struct CgroupsEntry {
+ std::string subsys_name;
+ uint32_t hierarchy;
+ uint64_t num_cgroups;
+ bool enabled;
+};
+
+// Returns a parsed representation of /proc/cgroups.
+PosixErrorOr<absl::flat_hash_map<std::string, CgroupsEntry>>
+ProcCgroupsEntries();
+
+// Represents a line from /proc/<pid>/cgroup.
+struct PIDCgroupEntry {
+ uint32_t hierarchy;
+ std::string controllers;
+ std::string path;
+};
+
+// Returns a parsed representation of /proc/<pid>/cgroup.
+PosixErrorOr<absl::flat_hash_map<std::string, PIDCgroupEntry>>
+ProcPIDCgroupEntries(pid_t pid);
+
+} // namespace testing
+} // namespace gvisor
+
+#endif // GVISOR_TEST_UTIL_CGROUP_UTIL_H_
diff --git a/test/util/fs_util.cc b/test/util/fs_util.cc
index 5f1ce0d8a..483ae848d 100644
--- a/test/util/fs_util.cc
+++ b/test/util/fs_util.cc
@@ -28,6 +28,8 @@
#include "absl/strings/str_cat.h"
#include "absl/strings/str_split.h"
#include "absl/strings/string_view.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
#include "test/util/cleanup.h"
#include "test/util/file_descriptor.h"
#include "test/util/posix_error.h"
@@ -366,6 +368,48 @@ PosixErrorOr<std::vector<std::string>> ListDir(absl::string_view abspath,
return files;
}
+PosixError DirContains(absl::string_view path,
+ const std::vector<std::string>& expect,
+ const std::vector<std::string>& exclude) {
+ ASSIGN_OR_RETURN_ERRNO(auto listing, ListDir(path, false));
+
+ for (auto& expected_entry : expect) {
+ auto cursor = std::find(listing.begin(), listing.end(), expected_entry);
+ if (cursor == listing.end()) {
+ return PosixError(ENOENT, absl::StrFormat("Failed to find '%s' in '%s'",
+ expected_entry, path));
+ }
+ }
+ for (auto& excluded_entry : exclude) {
+ auto cursor = std::find(listing.begin(), listing.end(), excluded_entry);
+ if (cursor != listing.end()) {
+ return PosixError(ENOENT, absl::StrCat("File '", excluded_entry,
+ "' found in path '", path, "'"));
+ }
+ }
+ return NoError();
+}
+
+PosixError EventuallyDirContains(absl::string_view path,
+ const std::vector<std::string>& expect,
+ const std::vector<std::string>& exclude) {
+ constexpr int kRetryCount = 100;
+ const absl::Duration kRetryDelay = absl::Milliseconds(100);
+
+ for (int i = 0; i < kRetryCount; ++i) {
+ auto res = DirContains(path, expect, exclude);
+ if (res.ok()) {
+ return res;
+ }
+ if (i < kRetryCount - 1) {
+ // Sleep if this isn't the final iteration.
+ absl::SleepFor(kRetryDelay);
+ }
+ }
+ return PosixError(ETIMEDOUT,
+ "Timed out while waiting for directory to contain files ");
+}
+
PosixError RecursivelyDelete(absl::string_view path, int* undeleted_dirs,
int* undeleted_files) {
ASSIGN_OR_RETURN_ERRNO(bool exists, Exists(path));
diff --git a/test/util/fs_util.h b/test/util/fs_util.h
index 2190c3bca..bb2d1d3c8 100644
--- a/test/util/fs_util.h
+++ b/test/util/fs_util.h
@@ -129,6 +129,18 @@ PosixError WalkTree(
PosixErrorOr<std::vector<std::string>> ListDir(absl::string_view abspath,
bool skipdots);
+// Check that a directory contains children nodes named in expect, and does not
+// contain any children nodes named in exclude.
+PosixError DirContains(absl::string_view path,
+ const std::vector<std::string>& expect,
+ const std::vector<std::string>& exclude);
+
+// Same as DirContains, but adds a retry. Suitable for checking a directory
+// being modified asynchronously.
+PosixError EventuallyDirContains(absl::string_view path,
+ const std::vector<std::string>& expect,
+ const std::vector<std::string>& exclude);
+
// Attempt to recursively delete a directory or file. Returns an error and
// the number of undeleted directories and files. If either
// undeleted_dirs or undeleted_files is nullptr then it will not be used.
diff --git a/tools/go_marshal/defs.bzl b/tools/go_marshal/defs.bzl
index f44f83eab..e23901815 100644
--- a/tools/go_marshal/defs.bzl
+++ b/tools/go_marshal/defs.bzl
@@ -58,7 +58,7 @@ go_marshal = rule(
marshal_deps = [
"//pkg/gohacks",
"//pkg/safecopy",
- "//pkg/usermem",
+ "//pkg/hostarch",
"//pkg/marshal",
]
diff --git a/tools/go_marshal/gomarshal/generator.go b/tools/go_marshal/gomarshal/generator.go
index 39394d2a7..0e2d752cb 100644
--- a/tools/go_marshal/gomarshal/generator.go
+++ b/tools/go_marshal/gomarshal/generator.go
@@ -113,7 +113,7 @@ func NewGenerator(srcs []string, out, outTest, outTestUnconditional, pkg string,
g.imports.add("unsafe")
g.imports.add("gvisor.dev/gvisor/pkg/gohacks")
g.imports.add("gvisor.dev/gvisor/pkg/safecopy")
- g.imports.add("gvisor.dev/gvisor/pkg/usermem")
+ g.imports.add("gvisor.dev/gvisor/pkg/hostarch")
g.imports.add("gvisor.dev/gvisor/pkg/marshal")
return &g, nil
diff --git a/tools/go_marshal/gomarshal/generator_interfaces.go b/tools/go_marshal/gomarshal/generator_interfaces.go
index 65f5ea34d..3e643e77f 100644
--- a/tools/go_marshal/gomarshal/generator_interfaces.go
+++ b/tools/go_marshal/gomarshal/generator_interfaces.go
@@ -120,16 +120,16 @@ func (g *interfaceGenerator) marshalScalar(accessor, typ, bufVar string) {
g.emit("%s[0] = byte(%s)\n", bufVar, accessor)
g.shift(bufVar, 1)
case "int16", "uint16":
- g.recordUsedImport("usermem")
- g.emit("usermem.ByteOrder.PutUint16(%s[:2], uint16(%s))\n", bufVar, accessor)
+ g.recordUsedImport("hostarch")
+ g.emit("hostarch.ByteOrder.PutUint16(%s[:2], uint16(%s))\n", bufVar, accessor)
g.shift(bufVar, 2)
case "int32", "uint32":
- g.recordUsedImport("usermem")
- g.emit("usermem.ByteOrder.PutUint32(%s[:4], uint32(%s))\n", bufVar, accessor)
+ g.recordUsedImport("hostarch")
+ g.emit("hostarch.ByteOrder.PutUint32(%s[:4], uint32(%s))\n", bufVar, accessor)
g.shift(bufVar, 4)
case "int64", "uint64":
- g.recordUsedImport("usermem")
- g.emit("usermem.ByteOrder.PutUint64(%s[:8], uint64(%s))\n", bufVar, accessor)
+ g.recordUsedImport("hostarch")
+ g.emit("hostarch.ByteOrder.PutUint64(%s[:8], uint64(%s))\n", bufVar, accessor)
g.shift(bufVar, 8)
default:
g.emit("%s.MarshalBytes(%s[:%s.SizeBytes()])\n", accessor, bufVar, accessor)
@@ -147,16 +147,16 @@ func (g *interfaceGenerator) unmarshalScalar(accessor, typ, bufVar string) {
g.emit("%s = %s(%s[0])\n", accessor, typ, bufVar)
g.shift(bufVar, 1)
case "int16", "uint16":
- g.recordUsedImport("usermem")
- g.emit("%s = %s(usermem.ByteOrder.Uint16(%s[:2]))\n", accessor, typ, bufVar)
+ g.recordUsedImport("hostarch")
+ g.emit("%s = %s(hostarch.ByteOrder.Uint16(%s[:2]))\n", accessor, typ, bufVar)
g.shift(bufVar, 2)
case "int32", "uint32":
- g.recordUsedImport("usermem")
- g.emit("%s = %s(usermem.ByteOrder.Uint32(%s[:4]))\n", accessor, typ, bufVar)
+ g.recordUsedImport("hostarch")
+ g.emit("%s = %s(hostarch.ByteOrder.Uint32(%s[:4]))\n", accessor, typ, bufVar)
g.shift(bufVar, 4)
case "int64", "uint64":
- g.recordUsedImport("usermem")
- g.emit("%s = %s(usermem.ByteOrder.Uint64(%s[:8]))\n", accessor, typ, bufVar)
+ g.recordUsedImport("hostarch")
+ g.emit("%s = %s(hostarch.ByteOrder.Uint64(%s[:8]))\n", accessor, typ, bufVar)
g.shift(bufVar, 8)
default:
g.emit("%s.UnmarshalBytes(%s[:%s.SizeBytes()])\n", accessor, bufVar, accessor)
diff --git a/tools/go_marshal/gomarshal/generator_interfaces_array_newtype.go b/tools/go_marshal/gomarshal/generator_interfaces_array_newtype.go
index 7525b52da..32afece2e 100644
--- a/tools/go_marshal/gomarshal/generator_interfaces_array_newtype.go
+++ b/tools/go_marshal/gomarshal/generator_interfaces_array_newtype.go
@@ -39,7 +39,7 @@ func (g *interfaceGenerator) emitMarshallableForArrayNewtype(n *ast.Ident, a *as
g.recordUsedImport("runtime")
g.recordUsedImport("safecopy")
g.recordUsedImport("unsafe")
- g.recordUsedImport("usermem")
+ g.recordUsedImport("hostarch")
lenExpr := g.arrayLenExpr(a)
@@ -102,7 +102,7 @@ func (g *interfaceGenerator) emitMarshallableForArrayNewtype(n *ast.Ident, a *as
g.emit("// CopyOutN implements marshal.Marshallable.CopyOutN.\n")
g.emit("//go:nosplit\n")
- g.emit("func (%s *%s) CopyOutN(cc marshal.CopyContext, addr usermem.Addr, limit int) (int, error) {\n", g.r, g.typeName())
+ g.emit("func (%s *%s) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) {\n", g.r, g.typeName())
g.inIndent(func() {
g.emitCastToByteSlice(g.r, "buf", fmt.Sprintf("%s.SizeBytes()", g.r))
@@ -114,7 +114,7 @@ func (g *interfaceGenerator) emitMarshallableForArrayNewtype(n *ast.Ident, a *as
g.emit("// CopyOut implements marshal.Marshallable.CopyOut.\n")
g.emit("//go:nosplit\n")
- g.emit("func (%s *%s) CopyOut(cc marshal.CopyContext, addr usermem.Addr) (int, error) {\n", g.r, g.typeName())
+ g.emit("func (%s *%s) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {\n", g.r, g.typeName())
g.inIndent(func() {
g.emit("return %s.CopyOutN(cc, addr, %s.SizeBytes())\n", g.r, g.r)
})
@@ -122,7 +122,7 @@ func (g *interfaceGenerator) emitMarshallableForArrayNewtype(n *ast.Ident, a *as
g.emit("// CopyIn implements marshal.Marshallable.CopyIn.\n")
g.emit("//go:nosplit\n")
- g.emit("func (%s *%s) CopyIn(cc marshal.CopyContext, addr usermem.Addr) (int, error) {\n", g.r, g.typeName())
+ g.emit("func (%s *%s) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {\n", g.r, g.typeName())
g.inIndent(func() {
g.emitCastToByteSlice(g.r, "buf", fmt.Sprintf("%s.SizeBytes()", g.r))
diff --git a/tools/go_marshal/gomarshal/generator_interfaces_dynamic.go b/tools/go_marshal/gomarshal/generator_interfaces_dynamic.go
index b1a8622cd..345020ddc 100644
--- a/tools/go_marshal/gomarshal/generator_interfaces_dynamic.go
+++ b/tools/go_marshal/gomarshal/generator_interfaces_dynamic.go
@@ -46,8 +46,8 @@ func (g *interfaceGenerator) emitMarshallableForDynamicType() {
g.emit("// CopyOutN implements marshal.Marshallable.CopyOutN.\n")
g.emit("//go:nosplit\n")
g.recordUsedImport("marshal")
- g.recordUsedImport("usermem")
- g.emit("func (%s *%s) CopyOutN(cc marshal.CopyContext, addr usermem.Addr, limit int) (int, error) {\n", g.r, g.typeName())
+ g.recordUsedImport("hostarch")
+ g.emit("func (%s *%s) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) {\n", g.r, g.typeName())
g.inIndent(func() {
g.emit("// Type %s doesn't have a packed layout in memory, fall back to MarshalBytes.\n", g.typeName())
g.emit("buf := cc.CopyScratchBuffer(%s.SizeBytes()) // escapes: okay.\n", g.r)
@@ -59,8 +59,8 @@ func (g *interfaceGenerator) emitMarshallableForDynamicType() {
g.emit("// CopyOut implements marshal.Marshallable.CopyOut.\n")
g.emit("//go:nosplit\n")
g.recordUsedImport("marshal")
- g.recordUsedImport("usermem")
- g.emit("func (%s *%s) CopyOut(cc marshal.CopyContext, addr usermem.Addr) (int, error) {\n", g.r, g.typeName())
+ g.recordUsedImport("hostarch")
+ g.emit("func (%s *%s) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {\n", g.r, g.typeName())
g.inIndent(func() {
g.emit("return %s.CopyOutN(cc, addr, %s.SizeBytes())\n", g.r, g.r)
})
@@ -69,8 +69,8 @@ func (g *interfaceGenerator) emitMarshallableForDynamicType() {
g.emit("// CopyIn implements marshal.Marshallable.CopyIn.\n")
g.emit("//go:nosplit\n")
g.recordUsedImport("marshal")
- g.recordUsedImport("usermem")
- g.emit("func (%s *%s) CopyIn(cc marshal.CopyContext, addr usermem.Addr) (int, error) {\n", g.r, g.typeName())
+ g.recordUsedImport("hostarch")
+ g.emit("func (%s *%s) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {\n", g.r, g.typeName())
g.inIndent(func() {
g.emit("// Type %s doesn't have a packed layout in memory, fall back to UnmarshalBytes.\n", g.typeName())
g.emit("buf := cc.CopyScratchBuffer(%s.SizeBytes()) // escapes: okay.\n", g.r)
diff --git a/tools/go_marshal/gomarshal/generator_interfaces_primitive_newtype.go b/tools/go_marshal/gomarshal/generator_interfaces_primitive_newtype.go
index 7edaf666c..05f0e0db4 100644
--- a/tools/go_marshal/gomarshal/generator_interfaces_primitive_newtype.go
+++ b/tools/go_marshal/gomarshal/generator_interfaces_primitive_newtype.go
@@ -29,14 +29,14 @@ func (g *interfaceGenerator) marshalPrimitiveScalar(accessor, typ, bufVar string
case "int8", "uint8", "byte":
g.emit("%s[0] = byte(*%s)\n", bufVar, accessor)
case "int16", "uint16":
- g.recordUsedImport("usermem")
- g.emit("usermem.ByteOrder.PutUint16(%s[:2], uint16(*%s))\n", bufVar, accessor)
+ g.recordUsedImport("hostarch")
+ g.emit("hostarch.ByteOrder.PutUint16(%s[:2], uint16(*%s))\n", bufVar, accessor)
case "int32", "uint32":
- g.recordUsedImport("usermem")
- g.emit("usermem.ByteOrder.PutUint32(%s[:4], uint32(*%s))\n", bufVar, accessor)
+ g.recordUsedImport("hostarch")
+ g.emit("hostarch.ByteOrder.PutUint32(%s[:4], uint32(*%s))\n", bufVar, accessor)
case "int64", "uint64":
- g.recordUsedImport("usermem")
- g.emit("usermem.ByteOrder.PutUint64(%s[:8], uint64(*%s))\n", bufVar, accessor)
+ g.recordUsedImport("hostarch")
+ g.emit("hostarch.ByteOrder.PutUint64(%s[:8], uint64(*%s))\n", bufVar, accessor)
default:
g.emit("// Explicilty cast to the underlying type before dispatching to\n")
g.emit("// MarshalBytes, so we don't recursively call %s.MarshalBytes\n", accessor)
@@ -53,14 +53,14 @@ func (g *interfaceGenerator) unmarshalPrimitiveScalar(accessor, typ, bufVar, typ
case "int8", "uint8":
g.emit("*%s = %s(%s(%s[0]))\n", accessor, typeCast, typ, bufVar)
case "int16", "uint16":
- g.recordUsedImport("usermem")
- g.emit("*%s = %s(%s(usermem.ByteOrder.Uint16(%s[:2])))\n", accessor, typeCast, typ, bufVar)
+ g.recordUsedImport("hostarch")
+ g.emit("*%s = %s(%s(hostarch.ByteOrder.Uint16(%s[:2])))\n", accessor, typeCast, typ, bufVar)
case "int32", "uint32":
- g.recordUsedImport("usermem")
- g.emit("*%s = %s(%s(usermem.ByteOrder.Uint32(%s[:4])))\n", accessor, typeCast, typ, bufVar)
+ g.recordUsedImport("hostarch")
+ g.emit("*%s = %s(%s(hostarch.ByteOrder.Uint32(%s[:4])))\n", accessor, typeCast, typ, bufVar)
case "int64", "uint64":
- g.recordUsedImport("usermem")
- g.emit("*%s = %s(%s(usermem.ByteOrder.Uint64(%s[:8])))\n", accessor, typeCast, typ, bufVar)
+ g.recordUsedImport("hostarch")
+ g.emit("*%s = %s(%s(hostarch.ByteOrder.Uint64(%s[:8])))\n", accessor, typeCast, typ, bufVar)
default:
g.emit("// Explicilty cast to the underlying type before dispatching to\n")
g.emit("// UnmarshalBytes, so we don't recursively call %s.UnmarshalBytes\n", accessor)
@@ -101,7 +101,7 @@ func (g *interfaceGenerator) emitMarshallableForPrimitiveNewtype(nt *ast.Ident)
g.recordUsedImport("runtime")
g.recordUsedImport("safecopy")
g.recordUsedImport("unsafe")
- g.recordUsedImport("usermem")
+ g.recordUsedImport("hostarch")
g.emit("// SizeBytes implements marshal.Marshallable.SizeBytes.\n")
g.emit("//go:nosplit\n")
@@ -154,7 +154,7 @@ func (g *interfaceGenerator) emitMarshallableForPrimitiveNewtype(nt *ast.Ident)
g.emit("// CopyOutN implements marshal.Marshallable.CopyOutN.\n")
g.emit("//go:nosplit\n")
- g.emit("func (%s *%s) CopyOutN(cc marshal.CopyContext, addr usermem.Addr, limit int) (int, error) {\n", g.r, g.typeName())
+ g.emit("func (%s *%s) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) {\n", g.r, g.typeName())
g.inIndent(func() {
g.emitCastToByteSlice(g.r, "buf", fmt.Sprintf("%s.SizeBytes()", g.r))
@@ -166,7 +166,7 @@ func (g *interfaceGenerator) emitMarshallableForPrimitiveNewtype(nt *ast.Ident)
g.emit("// CopyOut implements marshal.Marshallable.CopyOut.\n")
g.emit("//go:nosplit\n")
- g.emit("func (%s *%s) CopyOut(cc marshal.CopyContext, addr usermem.Addr) (int, error) {\n", g.r, g.typeName())
+ g.emit("func (%s *%s) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {\n", g.r, g.typeName())
g.inIndent(func() {
g.emit("return %s.CopyOutN(cc, addr, %s.SizeBytes())\n", g.r, g.r)
})
@@ -174,7 +174,7 @@ func (g *interfaceGenerator) emitMarshallableForPrimitiveNewtype(nt *ast.Ident)
g.emit("// CopyIn implements marshal.Marshallable.CopyIn.\n")
g.emit("//go:nosplit\n")
- g.emit("func (%s *%s) CopyIn(cc marshal.CopyContext, addr usermem.Addr) (int, error) {\n", g.r, g.typeName())
+ g.emit("func (%s *%s) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {\n", g.r, g.typeName())
g.inIndent(func() {
g.emitCastToByteSlice(g.r, "buf", fmt.Sprintf("%s.SizeBytes()", g.r))
@@ -199,7 +199,7 @@ func (g *interfaceGenerator) emitMarshallableForPrimitiveNewtype(nt *ast.Ident)
func (g *interfaceGenerator) emitMarshallableSliceForPrimitiveNewtype(nt *ast.Ident, slice *sliceAPI) {
g.recordUsedImport("marshal")
- g.recordUsedImport("usermem")
+ g.recordUsedImport("hostarch")
g.recordUsedImport("reflect")
g.recordUsedImport("runtime")
g.recordUsedImport("unsafe")
@@ -211,7 +211,7 @@ func (g *interfaceGenerator) emitMarshallableSliceForPrimitiveNewtype(nt *ast.Id
g.emit("// Copy%sIn copies in a slice of %s objects from the task's memory.\n", slice.ident, eltType)
g.emit("//go:nosplit\n")
- g.emit("func Copy%sIn(cc marshal.CopyContext, addr usermem.Addr, dst []%s) (int, error) {\n", slice.ident, eltType)
+ g.emit("func Copy%sIn(cc marshal.CopyContext, addr hostarch.Addr, dst []%s) (int, error) {\n", slice.ident, eltType)
g.inIndent(func() {
g.emit("count := len(dst)\n")
g.emit("if count == 0 {\n")
@@ -231,7 +231,7 @@ func (g *interfaceGenerator) emitMarshallableSliceForPrimitiveNewtype(nt *ast.Id
g.emit("// Copy%sOut copies a slice of %s objects to the task's memory.\n", slice.ident, eltType)
g.emit("//go:nosplit\n")
- g.emit("func Copy%sOut(cc marshal.CopyContext, addr usermem.Addr, src []%s) (int, error) {\n", slice.ident, eltType)
+ g.emit("func Copy%sOut(cc marshal.CopyContext, addr hostarch.Addr, src []%s) (int, error) {\n", slice.ident, eltType)
g.inIndent(func() {
g.emit("count := len(src)\n")
g.emit("if count == 0 {\n")
diff --git a/tools/go_marshal/gomarshal/generator_interfaces_struct.go b/tools/go_marshal/gomarshal/generator_interfaces_struct.go
index 5f6306b8f..72df1ab64 100644
--- a/tools/go_marshal/gomarshal/generator_interfaces_struct.go
+++ b/tools/go_marshal/gomarshal/generator_interfaces_struct.go
@@ -319,8 +319,8 @@ func (g *interfaceGenerator) emitMarshallableForStruct(st *ast.StructType) {
g.emit("// CopyOutN implements marshal.Marshallable.CopyOutN.\n")
g.emit("//go:nosplit\n")
g.recordUsedImport("marshal")
- g.recordUsedImport("usermem")
- g.emit("func (%s *%s) CopyOutN(cc marshal.CopyContext, addr usermem.Addr, limit int) (int, error) {\n", g.r, g.typeName())
+ g.recordUsedImport("hostarch")
+ g.emit("func (%s *%s) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) {\n", g.r, g.typeName())
g.inIndent(func() {
fallback := func() {
g.emit("// Type %s doesn't have a packed layout in memory, fall back to MarshalBytes.\n", g.typeName())
@@ -352,8 +352,8 @@ func (g *interfaceGenerator) emitMarshallableForStruct(st *ast.StructType) {
g.emit("// CopyOut implements marshal.Marshallable.CopyOut.\n")
g.emit("//go:nosplit\n")
g.recordUsedImport("marshal")
- g.recordUsedImport("usermem")
- g.emit("func (%s *%s) CopyOut(cc marshal.CopyContext, addr usermem.Addr) (int, error) {\n", g.r, g.typeName())
+ g.recordUsedImport("hostarch")
+ g.emit("func (%s *%s) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {\n", g.r, g.typeName())
g.inIndent(func() {
g.emit("return %s.CopyOutN(cc, addr, %s.SizeBytes())\n", g.r, g.r)
})
@@ -362,8 +362,8 @@ func (g *interfaceGenerator) emitMarshallableForStruct(st *ast.StructType) {
g.emit("// CopyIn implements marshal.Marshallable.CopyIn.\n")
g.emit("//go:nosplit\n")
g.recordUsedImport("marshal")
- g.recordUsedImport("usermem")
- g.emit("func (%s *%s) CopyIn(cc marshal.CopyContext, addr usermem.Addr) (int, error) {\n", g.r, g.typeName())
+ g.recordUsedImport("hostarch")
+ g.emit("func (%s *%s) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {\n", g.r, g.typeName())
g.inIndent(func() {
fallback := func() {
g.emit("// Type %s doesn't have a packed layout in memory, fall back to UnmarshalBytes.\n", g.typeName())
@@ -436,10 +436,10 @@ func (g *interfaceGenerator) emitMarshallableSliceForStruct(st *ast.StructType,
}
g.recordUsedImport("marshal")
- g.recordUsedImport("usermem")
+ g.recordUsedImport("hostarch")
g.emit("// Copy%sIn copies in a slice of %s objects from the task's memory.\n", slice.ident, g.typeName())
- g.emit("func Copy%sIn(cc marshal.CopyContext, addr usermem.Addr, dst []%s) (int, error) {\n", slice.ident, g.typeName())
+ g.emit("func Copy%sIn(cc marshal.CopyContext, addr hostarch.Addr, dst []%s) (int, error) {\n", slice.ident, g.typeName())
g.inIndent(func() {
g.emit("count := len(dst)\n")
g.emit("if count == 0 {\n")
@@ -496,7 +496,7 @@ func (g *interfaceGenerator) emitMarshallableSliceForStruct(st *ast.StructType,
g.emit("}\n\n")
g.emit("// Copy%sOut copies a slice of %s objects to the task's memory.\n", slice.ident, g.typeName())
- g.emit("func Copy%sOut(cc marshal.CopyContext, addr usermem.Addr, src []%s) (int, error) {\n", slice.ident, g.typeName())
+ g.emit("func Copy%sOut(cc marshal.CopyContext, addr hostarch.Addr, src []%s) (int, error) {\n", slice.ident, g.typeName())
g.inIndent(func() {
g.emit("count := len(src)\n")
g.emit("if count == 0 {\n")
diff --git a/tools/go_marshal/gomarshal/generator_tests.go b/tools/go_marshal/gomarshal/generator_tests.go
index 6cf00843f..8f93a1de5 100644
--- a/tools/go_marshal/gomarshal/generator_tests.go
+++ b/tools/go_marshal/gomarshal/generator_tests.go
@@ -32,7 +32,7 @@ var standardImports = []string{
var sliceAPIImports = []string{
"encoding/binary",
- "gvisor.dev/gvisor/pkg/usermem",
+ "gvisor.dev/gvisor/pkg/hostarch",
}
type testGenerator struct {
@@ -143,7 +143,7 @@ func (g *testGenerator) emitTestMarshalUnmarshalPreservesData() {
}
func (g *testGenerator) emitTestMarshalUnmarshalSlicePreservesData(slice *sliceAPI) {
- for _, name := range []string{"binary", "usermem"} {
+ for _, name := range []string{"binary", "hostarch"} {
if !g.imports.markUsed(name) {
panic(fmt.Sprintf("Generated test for '%s' referenced a non-existent import with local name '%s'", g.typeName(), name))
}
@@ -155,7 +155,7 @@ func (g *testGenerator) emitTestMarshalUnmarshalSlicePreservesData(slice *sliceA
g.emit("size := (*%s)(nil).SizeBytes() * len(x)\n", g.typeName())
g.emit("buf := bytes.NewBuffer(make([]byte, size))\n")
g.emit("buf.Reset()\n")
- g.emit("if err := binary.Write(buf, usermem.ByteOrder, x[:]); err != nil {\n")
+ g.emit("if err := binary.Write(buf, hostarch.ByteOrder, x[:]); err != nil {\n")
g.inIndent(func() {
g.emit("t.Fatal(fmt.Sprintf(\"binary.Write failed: %v\", err))\n")
})
diff --git a/tools/go_marshal/test/BUILD b/tools/go_marshal/test/BUILD
index 5bceacd32..e872560a9 100644
--- a/tools/go_marshal/test/BUILD
+++ b/tools/go_marshal/test/BUILD
@@ -15,7 +15,7 @@ go_test(
deps = [
":test",
"//pkg/binary",
- "//pkg/usermem",
+ "//pkg/hostarch",
"//tools/go_marshal/analysis",
],
)
@@ -41,6 +41,7 @@ go_test(
srcs = ["marshal_test.go"],
deps = [
":test",
+ "//pkg/hostarch",
"//pkg/marshal",
"//pkg/marshal/primitive",
"//pkg/syserror",
diff --git a/tools/go_marshal/test/benchmark_test.go b/tools/go_marshal/test/benchmark_test.go
index 224d308c7..16f478ff7 100644
--- a/tools/go_marshal/test/benchmark_test.go
+++ b/tools/go_marshal/test/benchmark_test.go
@@ -22,7 +22,7 @@ import (
"testing"
"gvisor.dev/gvisor/pkg/binary"
- "gvisor.dev/gvisor/pkg/usermem"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/tools/go_marshal/analysis"
"gvisor.dev/gvisor/tools/go_marshal/test"
)
@@ -39,10 +39,10 @@ func BenchmarkEncodingBinary(b *testing.B) {
for n := 0; n < b.N; n++ {
buf := bytes.NewBuffer(make([]byte, size))
buf.Reset()
- if err := encbin.Write(buf, usermem.ByteOrder, &s1); err != nil {
+ if err := encbin.Write(buf, hostarch.ByteOrder, &s1); err != nil {
b.Error("Write:", err)
}
- if err := encbin.Read(buf, usermem.ByteOrder, &s2); err != nil {
+ if err := encbin.Read(buf, hostarch.ByteOrder, &s2); err != nil {
b.Error("Read:", err)
}
}
@@ -66,8 +66,8 @@ func BenchmarkBinary(b *testing.B) {
for n := 0; n < b.N; n++ {
buf := make([]byte, 0, size)
- buf = binary.Marshal(buf, usermem.ByteOrder, &s1)
- binary.Unmarshal(buf, usermem.ByteOrder, &s2)
+ buf = binary.Marshal(buf, hostarch.ByteOrder, &s1)
+ binary.Unmarshal(buf, hostarch.ByteOrder, &s2)
}
b.StopTimer()
@@ -89,42 +89,42 @@ func BenchmarkMarshalManual(b *testing.B) {
buf := make([]byte, 0, s1.SizeBytes())
// Marshal
- buf = binary.AppendUint64(buf, usermem.ByteOrder, s1.Dev)
- buf = binary.AppendUint64(buf, usermem.ByteOrder, s1.Ino)
- buf = binary.AppendUint64(buf, usermem.ByteOrder, s1.Nlink)
- buf = binary.AppendUint32(buf, usermem.ByteOrder, s1.Mode)
- buf = binary.AppendUint32(buf, usermem.ByteOrder, s1.UID)
- buf = binary.AppendUint32(buf, usermem.ByteOrder, s1.GID)
- buf = binary.AppendUint32(buf, usermem.ByteOrder, 0)
- buf = binary.AppendUint64(buf, usermem.ByteOrder, s1.Rdev)
- buf = binary.AppendUint64(buf, usermem.ByteOrder, uint64(s1.Size))
- buf = binary.AppendUint64(buf, usermem.ByteOrder, uint64(s1.Blksize))
- buf = binary.AppendUint64(buf, usermem.ByteOrder, uint64(s1.Blocks))
- buf = binary.AppendUint64(buf, usermem.ByteOrder, uint64(s1.ATime.Sec))
- buf = binary.AppendUint64(buf, usermem.ByteOrder, uint64(s1.ATime.Nsec))
- buf = binary.AppendUint64(buf, usermem.ByteOrder, uint64(s1.MTime.Sec))
- buf = binary.AppendUint64(buf, usermem.ByteOrder, uint64(s1.MTime.Nsec))
- buf = binary.AppendUint64(buf, usermem.ByteOrder, uint64(s1.CTime.Sec))
- buf = binary.AppendUint64(buf, usermem.ByteOrder, uint64(s1.CTime.Nsec))
+ buf = binary.AppendUint64(buf, hostarch.ByteOrder, s1.Dev)
+ buf = binary.AppendUint64(buf, hostarch.ByteOrder, s1.Ino)
+ buf = binary.AppendUint64(buf, hostarch.ByteOrder, s1.Nlink)
+ buf = binary.AppendUint32(buf, hostarch.ByteOrder, s1.Mode)
+ buf = binary.AppendUint32(buf, hostarch.ByteOrder, s1.UID)
+ buf = binary.AppendUint32(buf, hostarch.ByteOrder, s1.GID)
+ buf = binary.AppendUint32(buf, hostarch.ByteOrder, 0)
+ buf = binary.AppendUint64(buf, hostarch.ByteOrder, s1.Rdev)
+ buf = binary.AppendUint64(buf, hostarch.ByteOrder, uint64(s1.Size))
+ buf = binary.AppendUint64(buf, hostarch.ByteOrder, uint64(s1.Blksize))
+ buf = binary.AppendUint64(buf, hostarch.ByteOrder, uint64(s1.Blocks))
+ buf = binary.AppendUint64(buf, hostarch.ByteOrder, uint64(s1.ATime.Sec))
+ buf = binary.AppendUint64(buf, hostarch.ByteOrder, uint64(s1.ATime.Nsec))
+ buf = binary.AppendUint64(buf, hostarch.ByteOrder, uint64(s1.MTime.Sec))
+ buf = binary.AppendUint64(buf, hostarch.ByteOrder, uint64(s1.MTime.Nsec))
+ buf = binary.AppendUint64(buf, hostarch.ByteOrder, uint64(s1.CTime.Sec))
+ buf = binary.AppendUint64(buf, hostarch.ByteOrder, uint64(s1.CTime.Nsec))
// Unmarshal
- s2.Dev = usermem.ByteOrder.Uint64(buf[0:8])
- s2.Ino = usermem.ByteOrder.Uint64(buf[8:16])
- s2.Nlink = usermem.ByteOrder.Uint64(buf[16:24])
- s2.Mode = usermem.ByteOrder.Uint32(buf[24:28])
- s2.UID = usermem.ByteOrder.Uint32(buf[28:32])
- s2.GID = usermem.ByteOrder.Uint32(buf[32:36])
+ s2.Dev = hostarch.ByteOrder.Uint64(buf[0:8])
+ s2.Ino = hostarch.ByteOrder.Uint64(buf[8:16])
+ s2.Nlink = hostarch.ByteOrder.Uint64(buf[16:24])
+ s2.Mode = hostarch.ByteOrder.Uint32(buf[24:28])
+ s2.UID = hostarch.ByteOrder.Uint32(buf[28:32])
+ s2.GID = hostarch.ByteOrder.Uint32(buf[32:36])
// Padding: buf[36:40]
- s2.Rdev = usermem.ByteOrder.Uint64(buf[40:48])
- s2.Size = int64(usermem.ByteOrder.Uint64(buf[48:56]))
- s2.Blksize = int64(usermem.ByteOrder.Uint64(buf[56:64]))
- s2.Blocks = int64(usermem.ByteOrder.Uint64(buf[64:72]))
- s2.ATime.Sec = int64(usermem.ByteOrder.Uint64(buf[72:80]))
- s2.ATime.Nsec = int64(usermem.ByteOrder.Uint64(buf[80:88]))
- s2.MTime.Sec = int64(usermem.ByteOrder.Uint64(buf[88:96]))
- s2.MTime.Nsec = int64(usermem.ByteOrder.Uint64(buf[96:104]))
- s2.CTime.Sec = int64(usermem.ByteOrder.Uint64(buf[104:112]))
- s2.CTime.Nsec = int64(usermem.ByteOrder.Uint64(buf[112:120]))
+ s2.Rdev = hostarch.ByteOrder.Uint64(buf[40:48])
+ s2.Size = int64(hostarch.ByteOrder.Uint64(buf[48:56]))
+ s2.Blksize = int64(hostarch.ByteOrder.Uint64(buf[56:64]))
+ s2.Blocks = int64(hostarch.ByteOrder.Uint64(buf[64:72]))
+ s2.ATime.Sec = int64(hostarch.ByteOrder.Uint64(buf[72:80]))
+ s2.ATime.Nsec = int64(hostarch.ByteOrder.Uint64(buf[80:88]))
+ s2.MTime.Sec = int64(hostarch.ByteOrder.Uint64(buf[88:96]))
+ s2.MTime.Nsec = int64(hostarch.ByteOrder.Uint64(buf[96:104]))
+ s2.CTime.Sec = int64(hostarch.ByteOrder.Uint64(buf[104:112]))
+ s2.CTime.Nsec = int64(hostarch.ByteOrder.Uint64(buf[112:120]))
}
b.StopTimer()
@@ -187,8 +187,8 @@ func BenchmarkBinarySlice(b *testing.B) {
for n := 0; n < b.N; n++ {
buf := make([]byte, 0, size)
- buf = binary.Marshal(buf, usermem.ByteOrder, &s1)
- binary.Unmarshal(buf, usermem.ByteOrder, &s2)
+ buf = binary.Marshal(buf, hostarch.ByteOrder, &s1)
+ binary.Unmarshal(buf, hostarch.ByteOrder, &s2)
}
b.StopTimer()
diff --git a/tools/go_marshal/test/escape/BUILD b/tools/go_marshal/test/escape/BUILD
index 2981ef196..62e0b4665 100644
--- a/tools/go_marshal/test/escape/BUILD
+++ b/tools/go_marshal/test/escape/BUILD
@@ -7,8 +7,8 @@ go_library(
testonly = 1,
srcs = ["escape.go"],
deps = [
+ "//pkg/hostarch",
"//pkg/marshal",
- "//pkg/usermem",
"//tools/go_marshal/test",
],
)
diff --git a/tools/go_marshal/test/escape/escape.go b/tools/go_marshal/test/escape/escape.go
index df14ae98e..1ac606862 100644
--- a/tools/go_marshal/test/escape/escape.go
+++ b/tools/go_marshal/test/escape/escape.go
@@ -16,8 +16,8 @@
package escape
import (
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/marshal"
- "gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/tools/go_marshal/test"
)
@@ -29,21 +29,21 @@ func (*dummyCopyContext) CopyScratchBuffer(size int) []byte {
return make([]byte, size)
}
-func (*dummyCopyContext) CopyOutBytes(addr usermem.Addr, b []byte) (int, error) {
+func (*dummyCopyContext) CopyOutBytes(addr hostarch.Addr, b []byte) (int, error) {
return len(b), nil
}
-func (*dummyCopyContext) CopyInBytes(addr usermem.Addr, b []byte) (int, error) {
+func (*dummyCopyContext) CopyInBytes(addr hostarch.Addr, b []byte) (int, error) {
return len(b), nil
}
-func (t *dummyCopyContext) MarshalBytes(addr usermem.Addr, marshallable marshal.Marshallable) {
+func (t *dummyCopyContext) MarshalBytes(addr hostarch.Addr, marshallable marshal.Marshallable) {
buf := t.CopyScratchBuffer(marshallable.SizeBytes())
marshallable.MarshalBytes(buf)
t.CopyOutBytes(addr, buf)
}
-func (t *dummyCopyContext) MarshalUnsafe(addr usermem.Addr, marshallable marshal.Marshallable) {
+func (t *dummyCopyContext) MarshalUnsafe(addr hostarch.Addr, marshallable marshal.Marshallable) {
buf := t.CopyScratchBuffer(marshallable.SizeBytes())
marshallable.MarshalUnsafe(buf)
t.CopyOutBytes(addr, buf)
@@ -53,14 +53,14 @@ func (t *dummyCopyContext) MarshalUnsafe(addr usermem.Addr, marshallable marshal
//go:nosplit
func doCopyIn(t *dummyCopyContext) {
var stat test.Stat
- stat.CopyIn(t, usermem.Addr(0xf000ba12))
+ stat.CopyIn(t, hostarch.Addr(0xf000ba12))
}
// +checkescape:all
//go:nosplit
func doCopyOut(t *dummyCopyContext) {
var stat test.Stat
- stat.CopyOut(t, usermem.Addr(0xf000ba12))
+ stat.CopyOut(t, hostarch.Addr(0xf000ba12))
}
// +mustescape:builtin
@@ -70,7 +70,7 @@ func doMarshalBytesDirect(t *dummyCopyContext) {
var stat test.Stat
buf := t.CopyScratchBuffer(stat.SizeBytes())
stat.MarshalBytes(buf)
- t.CopyOutBytes(usermem.Addr(0xf000ba12), buf)
+ t.CopyOutBytes(hostarch.Addr(0xf000ba12), buf)
}
// +mustescape:builtin
@@ -80,7 +80,7 @@ func doMarshalUnsafeDirect(t *dummyCopyContext) {
var stat test.Stat
buf := t.CopyScratchBuffer(stat.SizeBytes())
stat.MarshalUnsafe(buf)
- t.CopyOutBytes(usermem.Addr(0xf000ba12), buf)
+ t.CopyOutBytes(hostarch.Addr(0xf000ba12), buf)
}
// +mustescape:local,heap
@@ -88,7 +88,7 @@ func doMarshalUnsafeDirect(t *dummyCopyContext) {
//go:nosplit
func doMarshalBytesViaMarshallable(t *dummyCopyContext) {
var stat test.Stat
- t.MarshalBytes(usermem.Addr(0xf000ba12), &stat)
+ t.MarshalBytes(hostarch.Addr(0xf000ba12), &stat)
}
// +mustescape:local,heap
@@ -96,5 +96,5 @@ func doMarshalBytesViaMarshallable(t *dummyCopyContext) {
//go:nosplit
func doMarshalUnsafeViaMarshallable(t *dummyCopyContext) {
var stat test.Stat
- t.MarshalUnsafe(usermem.Addr(0xf000ba12), &stat)
+ t.MarshalUnsafe(hostarch.Addr(0xf000ba12), &stat)
}
diff --git a/tools/go_marshal/test/marshal_test.go b/tools/go_marshal/test/marshal_test.go
index 733689c79..43bafbf96 100644
--- a/tools/go_marshal/test/marshal_test.go
+++ b/tools/go_marshal/test/marshal_test.go
@@ -27,6 +27,7 @@ import (
"unsafe"
"github.com/google/go-cmp/cmp"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/marshal"
"gvisor.dev/gvisor/pkg/marshal/primitive"
"gvisor.dev/gvisor/pkg/syserror"
@@ -47,7 +48,7 @@ func (t *mockCopyContext) populate(val interface{}) {
var buf bytes.Buffer
// Use binary.Write so we aren't testing go-marshal against its own
// potentially buggy implementation.
- if err := binary.Write(&buf, usermem.ByteOrder, val); err != nil {
+ if err := binary.Write(&buf, hostarch.ByteOrder, val); err != nil {
panic(err)
}
t.taskMem.Bytes = buf.Bytes()
@@ -71,14 +72,14 @@ func (t *mockCopyContext) CopyScratchBuffer(size int) []byte {
// CopyOutBytes implements marshal.CopyContext.CopyOutBytes. The implementation
// completely ignores the target address and stores a copy of b in its
// internally buffer, overriding any previous contents.
-func (t *mockCopyContext) CopyOutBytes(_ usermem.Addr, b []byte) (int, error) {
+func (t *mockCopyContext) CopyOutBytes(_ hostarch.Addr, b []byte) (int, error) {
return t.taskMem.CopyOut(nil, 0, b, usermem.IOOpts{})
}
// CopyInBytes implements marshal.CopyContext.CopyInBytes. The implementation
// completely ignores the source address and always fills b from the begining of
// its internal buffer.
-func (t *mockCopyContext) CopyInBytes(_ usermem.Addr, b []byte) (int, error) {
+func (t *mockCopyContext) CopyInBytes(_ hostarch.Addr, b []byte) (int, error) {
return t.taskMem.CopyIn(nil, 0, b, usermem.IOOpts{})
}
@@ -91,7 +92,7 @@ func unsafeMemory(m marshal.Marshallable) []byte {
// since the layout isn't packed. Allocate a temporary buffer
// and marshal instead.
var buf bytes.Buffer
- if err := binary.Write(&buf, usermem.ByteOrder, m); err != nil {
+ if err := binary.Write(&buf, hostarch.ByteOrder, m); err != nil {
panic(err)
}
return buf.Bytes()
@@ -130,7 +131,7 @@ func unsafeMemorySlice(m interface{}, elt marshal.Marshallable) []byte {
// since the layout isn't packed. Allocate a temporary buffer
// and marshal instead.
var buf bytes.Buffer
- if err := binary.Write(&buf, usermem.ByteOrder, m); err != nil {
+ if err := binary.Write(&buf, hostarch.ByteOrder, m); err != nil {
panic(err)
}
return buf.Bytes()
@@ -176,7 +177,7 @@ func limitedCopyIn(t *testing.T, src, dst marshal.Marshallable, limit int) {
cc.populate(src)
cc.setLimit(limit)
- n, err := dst.CopyIn(&cc, usermem.Addr(0))
+ n, err := dst.CopyIn(&cc, hostarch.Addr(0))
if n != limit {
t.Errorf("CopyIn copied unexpected number of bytes, expected %d, got %d", limit, n)
}
@@ -206,7 +207,7 @@ func limitedCopyOut(t *testing.T, src marshal.Marshallable, limit int) {
var cc mockCopyContext
cc.setLimit(limit)
- n, err := src.CopyOut(&cc, usermem.Addr(0))
+ n, err := src.CopyOut(&cc, hostarch.Addr(0))
if n != limit {
t.Errorf("CopyOut copied unexpected number of bytes, expected %d, got %d", limit, n)
}
@@ -227,7 +228,7 @@ func copyOutN(t *testing.T, src marshal.Marshallable, limit int) {
var cc mockCopyContext
cc.setLimit(limit)
- n, err := src.CopyOutN(&cc, usermem.Addr(0), limit)
+ n, err := src.CopyOutN(&cc, hostarch.Addr(0), limit)
if err != nil {
t.Errorf("CopyOut returned unexpected error: %v", err)
}
@@ -304,18 +305,18 @@ func TestLimitedMarshalling(t *testing.T) {
func TestLimitedSliceMarshalling(t *testing.T) {
types := []struct {
arrayPtrType reflect.Type
- copySliceIn func(cc marshal.CopyContext, addr usermem.Addr, dstSlice interface{}) (int, error)
- copySliceOut func(cc marshal.CopyContext, addr usermem.Addr, srcSlice interface{}) (int, error)
+ copySliceIn func(cc marshal.CopyContext, addr hostarch.Addr, dstSlice interface{}) (int, error)
+ copySliceOut func(cc marshal.CopyContext, addr hostarch.Addr, srcSlice interface{}) (int, error)
unsafeMemory func(arrPtr interface{}) []byte
}{
// Packed types.
{
reflect.TypeOf((*[20]test.Stat)(nil)),
- func(cc marshal.CopyContext, addr usermem.Addr, dst interface{}) (int, error) {
+ func(cc marshal.CopyContext, addr hostarch.Addr, dst interface{}) (int, error) {
slice := dst.(*[20]test.Stat)[:]
return test.CopyStatSliceIn(cc, addr, slice)
},
- func(cc marshal.CopyContext, addr usermem.Addr, src interface{}) (int, error) {
+ func(cc marshal.CopyContext, addr hostarch.Addr, src interface{}) (int, error) {
slice := src.(*[20]test.Stat)[:]
return test.CopyStatSliceOut(cc, addr, slice)
},
@@ -326,11 +327,11 @@ func TestLimitedSliceMarshalling(t *testing.T) {
},
{
reflect.TypeOf((*[1]test.Stat)(nil)),
- func(cc marshal.CopyContext, addr usermem.Addr, dst interface{}) (int, error) {
+ func(cc marshal.CopyContext, addr hostarch.Addr, dst interface{}) (int, error) {
slice := dst.(*[1]test.Stat)[:]
return test.CopyStatSliceIn(cc, addr, slice)
},
- func(cc marshal.CopyContext, addr usermem.Addr, src interface{}) (int, error) {
+ func(cc marshal.CopyContext, addr hostarch.Addr, src interface{}) (int, error) {
slice := src.(*[1]test.Stat)[:]
return test.CopyStatSliceOut(cc, addr, slice)
},
@@ -341,11 +342,11 @@ func TestLimitedSliceMarshalling(t *testing.T) {
},
{
reflect.TypeOf((*[5]test.SignalSetAlias)(nil)),
- func(cc marshal.CopyContext, addr usermem.Addr, dst interface{}) (int, error) {
+ func(cc marshal.CopyContext, addr hostarch.Addr, dst interface{}) (int, error) {
slice := dst.(*[5]test.SignalSetAlias)[:]
return test.CopySignalSetAliasSliceIn(cc, addr, slice)
},
- func(cc marshal.CopyContext, addr usermem.Addr, src interface{}) (int, error) {
+ func(cc marshal.CopyContext, addr hostarch.Addr, src interface{}) (int, error) {
slice := src.(*[5]test.SignalSetAlias)[:]
return test.CopySignalSetAliasSliceOut(cc, addr, slice)
},
@@ -357,11 +358,11 @@ func TestLimitedSliceMarshalling(t *testing.T) {
// Non-packed types.
{
reflect.TypeOf((*[20]test.Type1)(nil)),
- func(cc marshal.CopyContext, addr usermem.Addr, dst interface{}) (int, error) {
+ func(cc marshal.CopyContext, addr hostarch.Addr, dst interface{}) (int, error) {
slice := dst.(*[20]test.Type1)[:]
return test.CopyType1SliceIn(cc, addr, slice)
},
- func(cc marshal.CopyContext, addr usermem.Addr, src interface{}) (int, error) {
+ func(cc marshal.CopyContext, addr hostarch.Addr, src interface{}) (int, error) {
slice := src.(*[20]test.Type1)[:]
return test.CopyType1SliceOut(cc, addr, slice)
},
@@ -372,11 +373,11 @@ func TestLimitedSliceMarshalling(t *testing.T) {
},
{
reflect.TypeOf((*[1]test.Type1)(nil)),
- func(cc marshal.CopyContext, addr usermem.Addr, dst interface{}) (int, error) {
+ func(cc marshal.CopyContext, addr hostarch.Addr, dst interface{}) (int, error) {
slice := dst.(*[1]test.Type1)[:]
return test.CopyType1SliceIn(cc, addr, slice)
},
- func(cc marshal.CopyContext, addr usermem.Addr, src interface{}) (int, error) {
+ func(cc marshal.CopyContext, addr hostarch.Addr, src interface{}) (int, error) {
slice := src.(*[1]test.Type1)[:]
return test.CopyType1SliceOut(cc, addr, slice)
},
@@ -387,11 +388,11 @@ func TestLimitedSliceMarshalling(t *testing.T) {
},
{
reflect.TypeOf((*[7]test.Type8)(nil)),
- func(cc marshal.CopyContext, addr usermem.Addr, dst interface{}) (int, error) {
+ func(cc marshal.CopyContext, addr hostarch.Addr, dst interface{}) (int, error) {
slice := dst.(*[7]test.Type8)[:]
return test.CopyType8SliceIn(cc, addr, slice)
},
- func(cc marshal.CopyContext, addr usermem.Addr, src interface{}) (int, error) {
+ func(cc marshal.CopyContext, addr hostarch.Addr, src interface{}) (int, error) {
slice := src.(*[7]test.Type8)[:]
return test.CopyType8SliceOut(cc, addr, slice)
},
@@ -444,7 +445,7 @@ func TestLimitedSliceMarshalling(t *testing.T) {
cc.populate(expected)
cc.setLimit(limit)
- n, err := tt.copySliceIn(&cc, usermem.Addr(0), actual)
+ n, err := tt.copySliceIn(&cc, hostarch.Addr(0), actual)
if n != limit {
t.Errorf("CopyIn copied unexpected number of bytes, expected %d, got %d", limit, n)
}
@@ -498,7 +499,7 @@ func TestLimitedSliceMarshalling(t *testing.T) {
cc.populate(expected)
cc.setLimit(limit)
- n, err := tt.copySliceOut(&cc, usermem.Addr(0), expected)
+ n, err := tt.copySliceOut(&cc, hostarch.Addr(0), expected)
if n != limit {
t.Errorf("CopyIn copied unexpected number of bytes, expected %d, got %d", limit, n)
}
@@ -523,14 +524,14 @@ func TestDynamicTypeStruct(t *testing.T) {
var cc mockCopyContext
cc.setLimit(t12.SizeBytes())
- if _, err := t12.CopyOut(&cc, usermem.Addr(0)); err != nil {
+ if _, err := t12.CopyOut(&cc, hostarch.Addr(0)); err != nil {
t.Fatalf("cc.CopyOut faile: %v", err)
}
res := test.Type12Dynamic{
Y: make([]primitive.Int64, len(t12.Y)),
}
- res.CopyIn(&cc, usermem.Addr(0))
+ res.CopyIn(&cc, hostarch.Addr(0))
if !reflect.DeepEqual(t12, res) {
t.Errorf("dynamic type is not same after marshalling and unmarshalling: before = %+v, after = %+v", t12, res)
}
@@ -541,12 +542,12 @@ func TestDynamicTypeIdentifier(t *testing.T) {
var cc mockCopyContext
cc.setLimit(s.SizeBytes())
- if _, err := s.CopyOut(&cc, usermem.Addr(0)); err != nil {
+ if _, err := s.CopyOut(&cc, hostarch.Addr(0)); err != nil {
t.Fatalf("cc.CopyOut faile: %v", err)
}
res := test.Type13Dynamic(make([]byte, len(s)))
- res.CopyIn(&cc, usermem.Addr(0))
+ res.CopyIn(&cc, hostarch.Addr(0))
if res != s {
t.Errorf("dynamic type is not same after marshalling and unmarshalling: before = %s, after = %s", s, res)
}
diff --git a/tools/nogo/analyzers.go b/tools/nogo/analyzers.go
index 8b4bff3b6..2b3c03fec 100644
--- a/tools/nogo/analyzers.go
+++ b/tools/nogo/analyzers.go
@@ -83,11 +83,6 @@ var AllAnalyzers = []*analysis.Analyzer{
checklocks.Analyzer,
}
-// EscapeAnalyzers is a list of escape-related analyzers.
-var EscapeAnalyzers = []*analysis.Analyzer{
- checkescape.EscapeAnalyzer,
-}
-
func register(all []*analysis.Analyzer) {
// Register all fact types.
//
@@ -129,5 +124,4 @@ func init() {
// Register lists.
register(AllAnalyzers)
- register(EscapeAnalyzers)
}
diff --git a/tools/nogo/check/main.go b/tools/nogo/check/main.go
index 69bdfe502..4194770be 100644
--- a/tools/nogo/check/main.go
+++ b/tools/nogo/check/main.go
@@ -31,7 +31,6 @@ var (
stdlibFile = flag.String("stdlib", "", "stdlib configuration file (in JSON format)")
findingsOutput = flag.String("findings", "", "output file (or stdout, if not specified)")
factsOutput = flag.String("facts", "", "output file for facts (optional)")
- escapesOutput = flag.String("escapes", "", "output file for escapes (optional)")
)
func loadConfig(file string, config interface{}) interface{} {
@@ -66,25 +65,13 @@ func main() {
// Run the configuration.
if *stdlibFile != "" {
- // Perform basic analysis.
+ // Perform stdlib analysis.
c := loadConfig(*stdlibFile, new(nogo.StdlibConfig)).(*nogo.StdlibConfig)
findings, factData, err = nogo.CheckStdlib(c, nogo.AllAnalyzers)
-
} else if *packageFile != "" {
- // Perform basic analysis.
+ // Perform standard analysis.
c := loadConfig(*packageFile, new(nogo.PackageConfig)).(*nogo.PackageConfig)
findings, factData, err = nogo.CheckPackage(c, nogo.AllAnalyzers, nil)
-
- // Do we need to do escape analysis?
- if *escapesOutput != "" {
- escapes, _, err := nogo.CheckPackage(c, nogo.EscapeAnalyzers, nil)
- if err != nil {
- log.Fatalf("error performing escape analysis: %v", err)
- }
- if err := nogo.WriteFindingsToFile(escapes, *escapesOutput); err != nil {
- log.Fatalf("error writing escapes to %q: %v", *escapesOutput, err)
- }
- }
} else {
log.Fatalf("please provide at least one of package or stdlib!")
}
diff --git a/tools/nogo/defs.bzl b/tools/nogo/defs.bzl
index 0c48a7a5a..cb407a736 100644
--- a/tools/nogo/defs.bzl
+++ b/tools/nogo/defs.bzl
@@ -174,7 +174,6 @@ NogoInfo = provider(
fields = {
"facts": "serialized package facts",
"raw_findings": "raw package findings (if relevant)",
- "escapes": "escape-only findings (if relevant)",
"importpath": "package import path",
"binaries": "package binary files",
"srcs": "srcs (for go_test support)",
@@ -281,7 +280,6 @@ def _nogo_aspect_impl(target, ctx):
go_ctx = go_context(ctx, goos = nogo_target_info.goos, goarch = nogo_target_info.goarch)
facts = ctx.actions.declare_file(target.label.name + ".facts")
raw_findings = ctx.actions.declare_file(target.label.name + ".raw_findings")
- escapes = ctx.actions.declare_file(target.label.name + ".escapes")
config = struct(
ImportPath = importpath,
GoFiles = [src.path for src in srcs if src.path.endswith(".go")],
@@ -298,7 +296,7 @@ def _nogo_aspect_impl(target, ctx):
inputs.append(config_file)
ctx.actions.run(
inputs = inputs,
- outputs = [facts, raw_findings, escapes],
+ outputs = [facts, raw_findings],
tools = depset(go_ctx.runfiles.to_list() + ctx.files._nogo_objdump_tool),
executable = ctx.files._nogo_check[0],
mnemonic = "NogoAnalysis",
@@ -309,7 +307,6 @@ def _nogo_aspect_impl(target, ctx):
"-package=%s" % config_file.path,
"-findings=%s" % raw_findings.path,
"-facts=%s" % facts.path,
- "-escapes=%s" % escapes.path,
],
)
@@ -322,15 +319,16 @@ def _nogo_aspect_impl(target, ctx):
all_raw_findings = [stdlib_info.raw_findings] + depset(all_raw_findings).to_list() + [raw_findings]
# Return the package facts as output.
- return [NogoInfo(
- facts = facts,
- raw_findings = all_raw_findings,
- escapes = escapes,
- importpath = importpath,
- binaries = binaries,
- srcs = srcs,
- deps = deps,
- )]
+ return [
+ NogoInfo(
+ facts = facts,
+ raw_findings = all_raw_findings,
+ importpath = importpath,
+ binaries = binaries,
+ srcs = srcs,
+ deps = deps,
+ ),
+ ]
nogo_aspect = go_rule(
aspect,
@@ -367,7 +365,6 @@ def _nogo_test_impl(ctx):
if len(ctx.attr.deps) != 1:
fail("nogo_test requires exactly one dep.")
raw_findings = ctx.attr.deps[0][NogoInfo].raw_findings
- escapes = ctx.attr.deps[0][NogoInfo].escapes
# Build a step that applies the configuration.
config_srcs = ctx.attr.config[NogoConfigInfo].srcs
@@ -409,8 +406,6 @@ def _nogo_test_impl(ctx):
# pays attention to the mnemoic above, so this must be
# what is expected by the tooling.
nogo_findings = depset([findings]),
- # Expose all escape analysis findings (see above).
- nogo_escapes = depset([escapes]),
)]
nogo_test = rule(
@@ -432,3 +427,18 @@ nogo_test = rule(
},
test = True,
)
+
+def _nogo_aspect_tricorder_impl(target, ctx):
+ if ctx.rule.kind != "nogo_test" or OutputGroupInfo not in target:
+ return []
+ if not hasattr(target[OutputGroupInfo], "nogo_findings"):
+ return []
+ return [
+ OutputGroupInfo(tricorder = target[OutputGroupInfo].nogo_findings),
+ ]
+
+# Trivial aspect that forwards the findings from a nogo_test rule to
+# go/tricorder, which reads from the `tricorder` output group.
+nogo_aspect_tricorder = aspect(
+ implementation = _nogo_aspect_tricorder_impl,
+)